1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 16 { 17 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 18 PetscErrorCode ierr; 19 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 20 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 21 PetscReal shift = 0.0; 22 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 23 24 PetscFunctionBegin; 25 allowzeropivot = PetscNot(A->erroriffailure); 26 27 if (a->idiagvalid) { 28 if (values) *values = a->idiag; 29 PetscFunctionReturn(0); 30 } 31 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 32 diag_offset = a->diag; 33 if (!a->idiag) { 34 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 35 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 36 } 37 diag = a->idiag; 38 mdiag = a->idiag+bs2*mbs; 39 if (values) *values = a->idiag; 40 /* factor and invert each block */ 41 switch (bs) { 42 case 1: 43 for (i=0; i<mbs; i++) { 44 odiag = v + 1*diag_offset[i]; 45 diag[0] = odiag[0]; 46 mdiag[0] = odiag[0]; 47 48 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 49 if (allowzeropivot) { 50 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 51 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 52 A->factorerror_zeropivot_row = i; 53 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 54 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 55 } 56 57 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 58 diag += 1; 59 mdiag += 1; 60 } 61 break; 62 case 2: 63 for (i=0; i<mbs; i++) { 64 odiag = v + 4*diag_offset[i]; 65 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 66 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 67 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 68 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 69 diag += 4; 70 mdiag += 4; 71 } 72 break; 73 case 3: 74 for (i=0; i<mbs; i++) { 75 odiag = v + 9*diag_offset[i]; 76 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 77 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 78 diag[8] = odiag[8]; 79 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 80 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 81 mdiag[8] = odiag[8]; 82 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 83 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 94 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 105 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 106 diag += 25; 107 mdiag += 25; 108 } 109 break; 110 case 6: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 36*diag_offset[i]; 113 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 115 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 116 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 117 diag += 36; 118 mdiag += 36; 119 } 120 break; 121 case 7: 122 for (i=0; i<mbs; i++) { 123 odiag = v + 49*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += 49; 129 mdiag += 49; 130 } 131 break; 132 default: 133 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 134 for (i=0; i<mbs; i++) { 135 odiag = v + bs2*diag_offset[i]; 136 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 138 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 139 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 140 diag += bs2; 141 mdiag += bs2; 142 } 143 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 144 } 145 a->idiagvalid = PETSC_TRUE; 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 150 { 151 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 152 PetscScalar *x,*work,*w,*workt,*t; 153 const MatScalar *v,*aa = a->a, *idiag; 154 const PetscScalar *b,*xb; 155 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 162 its = its*lits; 163 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 164 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 165 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 166 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 167 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 168 169 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 170 171 if (!m) PetscFunctionReturn(0); 172 diag = a->diag; 173 idiag = a->idiag; 174 k = PetscMax(A->rmap->n,A->cmap->n); 175 if (!a->mult_work) { 176 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 177 } 178 if (!a->sor_workt) { 179 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 180 } 181 if (!a->sor_work) { 182 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 183 } 184 work = a->mult_work; 185 t = a->sor_workt; 186 w = a->sor_work; 187 188 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 189 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 190 191 if (flag & SOR_ZERO_INITIAL_GUESS) { 192 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 193 switch (bs) { 194 case 1: 195 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 196 t[0] = b[0]; 197 i2 = 1; 198 idiag += 1; 199 for (i=1; i<m; i++) { 200 v = aa + ai[i]; 201 vi = aj + ai[i]; 202 nz = diag[i] - ai[i]; 203 s[0] = b[i2]; 204 for (j=0; j<nz; j++) { 205 xw[0] = x[vi[j]]; 206 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 207 } 208 t[i2] = s[0]; 209 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 210 x[i2] = xw[0]; 211 idiag += 1; 212 i2 += 1; 213 } 214 break; 215 case 2: 216 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 217 t[0] = b[0]; t[1] = b[1]; 218 i2 = 2; 219 idiag += 4; 220 for (i=1; i<m; i++) { 221 v = aa + 4*ai[i]; 222 vi = aj + ai[i]; 223 nz = diag[i] - ai[i]; 224 s[0] = b[i2]; s[1] = b[i2+1]; 225 for (j=0; j<nz; j++) { 226 idx = 2*vi[j]; 227 it = 4*j; 228 xw[0] = x[idx]; xw[1] = x[1+idx]; 229 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 230 } 231 t[i2] = s[0]; t[i2+1] = s[1]; 232 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 233 x[i2] = xw[0]; x[i2+1] = xw[1]; 234 idiag += 4; 235 i2 += 2; 236 } 237 break; 238 case 3: 239 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 240 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 241 i2 = 3; 242 idiag += 9; 243 for (i=1; i<m; i++) { 244 v = aa + 9*ai[i]; 245 vi = aj + ai[i]; 246 nz = diag[i] - ai[i]; 247 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 248 while (nz--) { 249 idx = 3*(*vi++); 250 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 251 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 252 v += 9; 253 } 254 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 255 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 256 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 257 idiag += 9; 258 i2 += 3; 259 } 260 break; 261 case 4: 262 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 263 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 264 i2 = 4; 265 idiag += 16; 266 for (i=1; i<m; i++) { 267 v = aa + 16*ai[i]; 268 vi = aj + ai[i]; 269 nz = diag[i] - ai[i]; 270 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 271 while (nz--) { 272 idx = 4*(*vi++); 273 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 274 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 275 v += 16; 276 } 277 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 278 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 279 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 280 idiag += 16; 281 i2 += 4; 282 } 283 break; 284 case 5: 285 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 286 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 287 i2 = 5; 288 idiag += 25; 289 for (i=1; i<m; i++) { 290 v = aa + 25*ai[i]; 291 vi = aj + ai[i]; 292 nz = diag[i] - ai[i]; 293 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 294 while (nz--) { 295 idx = 5*(*vi++); 296 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 298 v += 25; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 301 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 302 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 303 idiag += 25; 304 i2 += 5; 305 } 306 break; 307 case 6: 308 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 309 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 310 i2 = 6; 311 idiag += 36; 312 for (i=1; i<m; i++) { 313 v = aa + 36*ai[i]; 314 vi = aj + ai[i]; 315 nz = diag[i] - ai[i]; 316 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 317 while (nz--) { 318 idx = 6*(*vi++); 319 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 320 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 321 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 322 v += 36; 323 } 324 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 325 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 326 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 327 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 328 idiag += 36; 329 i2 += 6; 330 } 331 break; 332 case 7: 333 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 334 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 335 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 336 i2 = 7; 337 idiag += 49; 338 for (i=1; i<m; i++) { 339 v = aa + 49*ai[i]; 340 vi = aj + ai[i]; 341 nz = diag[i] - ai[i]; 342 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 343 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 344 while (nz--) { 345 idx = 7*(*vi++); 346 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 347 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 348 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 349 v += 49; 350 } 351 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 352 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 353 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 354 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 355 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 356 idiag += 49; 357 i2 += 7; 358 } 359 break; 360 default: 361 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 362 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 363 i2 = bs; 364 idiag += bs2; 365 for (i=1; i<m; i++) { 366 v = aa + bs2*ai[i]; 367 vi = aj + ai[i]; 368 nz = diag[i] - ai[i]; 369 370 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 371 /* copy all rows of x that are needed into contiguous space */ 372 workt = work; 373 for (j=0; j<nz; j++) { 374 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 375 workt += bs; 376 } 377 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 378 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 379 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 380 381 idiag += bs2; 382 i2 += bs; 383 } 384 break; 385 } 386 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 387 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 388 xb = t; 389 } 390 else xb = b; 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 392 idiag = a->idiag+bs2*(a->mbs-1); 393 i2 = bs * (m-1); 394 switch (bs) { 395 case 1: 396 s[0] = xb[i2]; 397 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 398 x[i2] = xw[0]; 399 i2 -= 1; 400 for (i=m-2; i>=0; i--) { 401 v = aa + (diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s[0] = xb[i2]; 405 for (j=0; j<nz; j++) { 406 xw[0] = x[vi[j]]; 407 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 408 } 409 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 410 x[i2] = xw[0]; 411 idiag -= 1; 412 i2 -= 1; 413 } 414 break; 415 case 2: 416 s[0] = xb[i2]; s[1] = xb[i2+1]; 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 i2 -= 2; 420 idiag -= 4; 421 for (i=m-2; i>=0; i--) { 422 v = aa + 4*(diag[i] + 1); 423 vi = aj + diag[i] + 1; 424 nz = ai[i+1] - diag[i] - 1; 425 s[0] = xb[i2]; s[1] = xb[i2+1]; 426 for (j=0; j<nz; j++) { 427 idx = 2*vi[j]; 428 it = 4*j; 429 xw[0] = x[idx]; xw[1] = x[1+idx]; 430 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 431 } 432 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 433 x[i2] = xw[0]; x[i2+1] = xw[1]; 434 idiag -= 4; 435 i2 -= 2; 436 } 437 break; 438 case 3: 439 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 i2 -= 3; 443 idiag -= 9; 444 for (i=m-2; i>=0; i--) { 445 v = aa + 9*(diag[i]+1); 446 vi = aj + diag[i] + 1; 447 nz = ai[i+1] - diag[i] - 1; 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 449 while (nz--) { 450 idx = 3*(*vi++); 451 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 452 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 453 v += 9; 454 } 455 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 456 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 457 idiag -= 9; 458 i2 -= 3; 459 } 460 break; 461 case 4: 462 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 i2 -= 4; 466 idiag -= 16; 467 for (i=m-2; i>=0; i--) { 468 v = aa + 16*(diag[i]+1); 469 vi = aj + diag[i] + 1; 470 nz = ai[i+1] - diag[i] - 1; 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 472 while (nz--) { 473 idx = 4*(*vi++); 474 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 475 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 476 v += 16; 477 } 478 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 479 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 480 idiag -= 16; 481 i2 -= 4; 482 } 483 break; 484 case 5: 485 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 i2 -= 5; 489 idiag -= 25; 490 for (i=m-2; i>=0; i--) { 491 v = aa + 25*(diag[i]+1); 492 vi = aj + diag[i] + 1; 493 nz = ai[i+1] - diag[i] - 1; 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 495 while (nz--) { 496 idx = 5*(*vi++); 497 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 499 v += 25; 500 } 501 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 503 idiag -= 25; 504 i2 -= 5; 505 } 506 break; 507 case 6: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 509 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 510 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 511 i2 -= 6; 512 idiag -= 36; 513 for (i=m-2; i>=0; i--) { 514 v = aa + 36*(diag[i]+1); 515 vi = aj + diag[i] + 1; 516 nz = ai[i+1] - diag[i] - 1; 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 518 while (nz--) { 519 idx = 6*(*vi++); 520 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 521 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 522 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 523 v += 36; 524 } 525 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 526 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 527 idiag -= 36; 528 i2 -= 6; 529 } 530 break; 531 case 7: 532 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 533 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 534 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 535 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 536 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 537 i2 -= 7; 538 idiag -= 49; 539 for (i=m-2; i>=0; i--) { 540 v = aa + 49*(diag[i]+1); 541 vi = aj + diag[i] + 1; 542 nz = ai[i+1] - diag[i] - 1; 543 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 544 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 545 while (nz--) { 546 idx = 7*(*vi++); 547 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 548 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 549 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 550 v += 49; 551 } 552 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 553 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 554 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 555 idiag -= 49; 556 i2 -= 7; 557 } 558 break; 559 default: 560 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 561 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 562 i2 -= bs; 563 idiag -= bs2; 564 for (i=m-2; i>=0; i--) { 565 v = aa + bs2*(diag[i]+1); 566 vi = aj + diag[i] + 1; 567 nz = ai[i+1] - diag[i] - 1; 568 569 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 570 /* copy all rows of x that are needed into contiguous space */ 571 workt = work; 572 for (j=0; j<nz; j++) { 573 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 574 workt += bs; 575 } 576 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 577 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 578 579 idiag -= bs2; 580 i2 -= bs; 581 } 582 break; 583 } 584 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 585 } 586 its--; 587 } 588 while (its--) { 589 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 590 idiag = a->idiag; 591 i2 = 0; 592 switch (bs) { 593 case 1: 594 for (i=0; i<m; i++) { 595 v = aa + ai[i]; 596 vi = aj + ai[i]; 597 nz = ai[i+1] - ai[i]; 598 s[0] = b[i2]; 599 for (j=0; j<nz; j++) { 600 xw[0] = x[vi[j]]; 601 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 602 } 603 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 604 x[i2] += xw[0]; 605 idiag += 1; 606 i2 += 1; 607 } 608 break; 609 case 2: 610 for (i=0; i<m; i++) { 611 v = aa + 4*ai[i]; 612 vi = aj + ai[i]; 613 nz = ai[i+1] - ai[i]; 614 s[0] = b[i2]; s[1] = b[i2+1]; 615 for (j=0; j<nz; j++) { 616 idx = 2*vi[j]; 617 it = 4*j; 618 xw[0] = x[idx]; xw[1] = x[1+idx]; 619 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 620 } 621 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 622 x[i2] += xw[0]; x[i2+1] += xw[1]; 623 idiag += 4; 624 i2 += 2; 625 } 626 break; 627 case 3: 628 for (i=0; i<m; i++) { 629 v = aa + 9*ai[i]; 630 vi = aj + ai[i]; 631 nz = ai[i+1] - ai[i]; 632 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 633 while (nz--) { 634 idx = 3*(*vi++); 635 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 636 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 637 v += 9; 638 } 639 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 640 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 641 idiag += 9; 642 i2 += 3; 643 } 644 break; 645 case 4: 646 for (i=0; i<m; i++) { 647 v = aa + 16*ai[i]; 648 vi = aj + ai[i]; 649 nz = ai[i+1] - ai[i]; 650 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 651 while (nz--) { 652 idx = 4*(*vi++); 653 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 654 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 655 v += 16; 656 } 657 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 658 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 659 idiag += 16; 660 i2 += 4; 661 } 662 break; 663 case 5: 664 for (i=0; i<m; i++) { 665 v = aa + 25*ai[i]; 666 vi = aj + ai[i]; 667 nz = ai[i+1] - ai[i]; 668 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 672 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 673 v += 25; 674 } 675 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 676 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 677 idiag += 25; 678 i2 += 5; 679 } 680 break; 681 case 6: 682 for (i=0; i<m; i++) { 683 v = aa + 36*ai[i]; 684 vi = aj + ai[i]; 685 nz = ai[i+1] - ai[i]; 686 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 687 while (nz--) { 688 idx = 6*(*vi++); 689 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 690 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 691 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 692 v += 36; 693 } 694 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 695 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 696 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 697 idiag += 36; 698 i2 += 6; 699 } 700 break; 701 case 7: 702 for (i=0; i<m; i++) { 703 v = aa + 49*ai[i]; 704 vi = aj + ai[i]; 705 nz = ai[i+1] - ai[i]; 706 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 707 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 708 while (nz--) { 709 idx = 7*(*vi++); 710 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 711 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 712 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 713 v += 49; 714 } 715 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 716 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 717 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 718 idiag += 49; 719 i2 += 7; 720 } 721 break; 722 default: 723 for (i=0; i<m; i++) { 724 v = aa + bs2*ai[i]; 725 vi = aj + ai[i]; 726 nz = ai[i+1] - ai[i]; 727 728 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 729 /* copy all rows of x that are needed into contiguous space */ 730 workt = work; 731 for (j=0; j<nz; j++) { 732 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 733 workt += bs; 734 } 735 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 736 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 737 738 idiag += bs2; 739 i2 += bs; 740 } 741 break; 742 } 743 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 744 } 745 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 746 idiag = a->idiag+bs2*(a->mbs-1); 747 i2 = bs * (m-1); 748 switch (bs) { 749 case 1: 750 for (i=m-1; i>=0; i--) { 751 v = aa + ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; 755 for (j=0; j<nz; j++) { 756 xw[0] = x[vi[j]]; 757 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 758 } 759 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 760 x[i2] += xw[0]; 761 idiag -= 1; 762 i2 -= 1; 763 } 764 break; 765 case 2: 766 for (i=m-1; i>=0; i--) { 767 v = aa + 4*ai[i]; 768 vi = aj + ai[i]; 769 nz = ai[i+1] - ai[i]; 770 s[0] = b[i2]; s[1] = b[i2+1]; 771 for (j=0; j<nz; j++) { 772 idx = 2*vi[j]; 773 it = 4*j; 774 xw[0] = x[idx]; xw[1] = x[1+idx]; 775 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 776 } 777 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 778 x[i2] += xw[0]; x[i2+1] += xw[1]; 779 idiag -= 4; 780 i2 -= 2; 781 } 782 break; 783 case 3: 784 for (i=m-1; i>=0; i--) { 785 v = aa + 9*ai[i]; 786 vi = aj + ai[i]; 787 nz = ai[i+1] - ai[i]; 788 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 789 while (nz--) { 790 idx = 3*(*vi++); 791 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 792 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 793 v += 9; 794 } 795 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 796 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 797 idiag -= 9; 798 i2 -= 3; 799 } 800 break; 801 case 4: 802 for (i=m-1; i>=0; i--) { 803 v = aa + 16*ai[i]; 804 vi = aj + ai[i]; 805 nz = ai[i+1] - ai[i]; 806 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 807 while (nz--) { 808 idx = 4*(*vi++); 809 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 810 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 811 v += 16; 812 } 813 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 814 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 815 idiag -= 16; 816 i2 -= 4; 817 } 818 break; 819 case 5: 820 for (i=m-1; i>=0; i--) { 821 v = aa + 25*ai[i]; 822 vi = aj + ai[i]; 823 nz = ai[i+1] - ai[i]; 824 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 825 while (nz--) { 826 idx = 5*(*vi++); 827 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 828 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 829 v += 25; 830 } 831 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 832 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 833 idiag -= 25; 834 i2 -= 5; 835 } 836 break; 837 case 6: 838 for (i=m-1; i>=0; i--) { 839 v = aa + 36*ai[i]; 840 vi = aj + ai[i]; 841 nz = ai[i+1] - ai[i]; 842 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 843 while (nz--) { 844 idx = 6*(*vi++); 845 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 846 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 847 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 848 v += 36; 849 } 850 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 851 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 852 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 853 idiag -= 36; 854 i2 -= 6; 855 } 856 break; 857 case 7: 858 for (i=m-1; i>=0; i--) { 859 v = aa + 49*ai[i]; 860 vi = aj + ai[i]; 861 nz = ai[i+1] - ai[i]; 862 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 863 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 864 while (nz--) { 865 idx = 7*(*vi++); 866 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 867 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 868 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 869 v += 49; 870 } 871 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 872 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 873 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 874 idiag -= 49; 875 i2 -= 7; 876 } 877 break; 878 default: 879 for (i=m-1; i>=0; i--) { 880 v = aa + bs2*ai[i]; 881 vi = aj + ai[i]; 882 nz = ai[i+1] - ai[i]; 883 884 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 885 /* copy all rows of x that are needed into contiguous space */ 886 workt = work; 887 for (j=0; j<nz; j++) { 888 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 889 workt += bs; 890 } 891 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 892 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 893 894 idiag -= bs2; 895 i2 -= bs; 896 } 897 break; 898 } 899 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 900 } 901 } 902 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 903 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 908 /* 909 Special version for direct calls from Fortran (Used in PETSc-fun3d) 910 */ 911 #if defined(PETSC_HAVE_FORTRAN_CAPS) 912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 914 #define matsetvaluesblocked4_ matsetvaluesblocked4 915 #endif 916 917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 918 { 919 Mat A = *AA; 920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 921 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 922 PetscInt *ai =a->i,*ailen=a->ilen; 923 PetscInt *aj =a->j,stepval,lastcol = -1; 924 const PetscScalar *value = v; 925 MatScalar *ap,*aa = a->a,*bap; 926 927 PetscFunctionBegin; 928 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 929 stepval = (n-1)*4; 930 for (k=0; k<m; k++) { /* loop over added rows */ 931 row = im[k]; 932 rp = aj + ai[row]; 933 ap = aa + 16*ai[row]; 934 nrow = ailen[row]; 935 low = 0; 936 high = nrow; 937 for (l=0; l<n; l++) { /* loop over added columns */ 938 col = in[l]; 939 if (col <= lastcol) low = 0; 940 else high = nrow; 941 lastcol = col; 942 value = v + k*(stepval+4 + l)*4; 943 while (high-low > 7) { 944 t = (low+high)/2; 945 if (rp[t] > col) high = t; 946 else low = t; 947 } 948 for (i=low; i<high; i++) { 949 if (rp[i] > col) break; 950 if (rp[i] == col) { 951 bap = ap + 16*i; 952 for (ii=0; ii<4; ii++,value+=stepval) { 953 for (jj=ii; jj<16; jj+=4) { 954 bap[jj] += *value++; 955 } 956 } 957 goto noinsert2; 958 } 959 } 960 N = nrow++ - 1; 961 high++; /* added new column index thus must search to one higher than before */ 962 /* shift up all the later entries in this row */ 963 for (ii=N; ii>=i; ii--) { 964 rp[ii+1] = rp[ii]; 965 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 966 } 967 if (N >= i) { 968 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 969 } 970 rp[i] = col; 971 bap = ap + 16*i; 972 for (ii=0; ii<4; ii++,value+=stepval) { 973 for (jj=ii; jj<16; jj+=4) { 974 bap[jj] = *value++; 975 } 976 } 977 noinsert2:; 978 low = i; 979 } 980 ailen[row] = nrow; 981 } 982 PetscFunctionReturnVoid(); 983 } 984 985 #if defined(PETSC_HAVE_FORTRAN_CAPS) 986 #define matsetvalues4_ MATSETVALUES4 987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 988 #define matsetvalues4_ matsetvalues4 989 #endif 990 991 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 992 { 993 Mat A = *AA; 994 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 995 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 996 PetscInt *ai=a->i,*ailen=a->ilen; 997 PetscInt *aj=a->j,brow,bcol; 998 PetscInt ridx,cidx,lastcol = -1; 999 MatScalar *ap,value,*aa=a->a,*bap; 1000 1001 PetscFunctionBegin; 1002 for (k=0; k<m; k++) { /* loop over added rows */ 1003 row = im[k]; brow = row/4; 1004 rp = aj + ai[brow]; 1005 ap = aa + 16*ai[brow]; 1006 nrow = ailen[brow]; 1007 low = 0; 1008 high = nrow; 1009 for (l=0; l<n; l++) { /* loop over added columns */ 1010 col = in[l]; bcol = col/4; 1011 ridx = row % 4; cidx = col % 4; 1012 value = v[l + k*n]; 1013 if (col <= lastcol) low = 0; 1014 else high = nrow; 1015 lastcol = col; 1016 while (high-low > 7) { 1017 t = (low+high)/2; 1018 if (rp[t] > bcol) high = t; 1019 else low = t; 1020 } 1021 for (i=low; i<high; i++) { 1022 if (rp[i] > bcol) break; 1023 if (rp[i] == bcol) { 1024 bap = ap + 16*i + 4*cidx + ridx; 1025 *bap += value; 1026 goto noinsert1; 1027 } 1028 } 1029 N = nrow++ - 1; 1030 high++; /* added new column thus must search to one higher than before */ 1031 /* shift up all the later entries in this row */ 1032 for (ii=N; ii>=i; ii--) { 1033 rp[ii+1] = rp[ii]; 1034 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1035 } 1036 if (N>=i) { 1037 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1038 } 1039 rp[i] = bcol; 1040 ap[16*i + 4*cidx + ridx] = value; 1041 noinsert1:; 1042 low = i; 1043 } 1044 ailen[brow] = nrow; 1045 } 1046 PetscFunctionReturnVoid(); 1047 } 1048 1049 /* 1050 Checks for missing diagonals 1051 */ 1052 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1053 { 1054 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 PetscInt *diag,*ii = a->i,i; 1057 1058 PetscFunctionBegin; 1059 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1060 *missing = PETSC_FALSE; 1061 if (A->rmap->n > 0 && !ii) { 1062 *missing = PETSC_TRUE; 1063 if (d) *d = 0; 1064 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1065 } else { 1066 diag = a->diag; 1067 for (i=0; i<a->mbs; i++) { 1068 if (diag[i] >= ii[i+1]) { 1069 *missing = PETSC_TRUE; 1070 if (d) *d = i; 1071 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1072 break; 1073 } 1074 } 1075 } 1076 PetscFunctionReturn(0); 1077 } 1078 1079 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1080 { 1081 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1082 PetscErrorCode ierr; 1083 PetscInt i,j,m = a->mbs; 1084 1085 PetscFunctionBegin; 1086 if (!a->diag) { 1087 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1088 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1089 a->free_diag = PETSC_TRUE; 1090 } 1091 for (i=0; i<m; i++) { 1092 a->diag[i] = a->i[i+1]; 1093 for (j=a->i[i]; j<a->i[i+1]; j++) { 1094 if (a->j[j] == i) { 1095 a->diag[i] = j; 1096 break; 1097 } 1098 } 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 1104 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1105 { 1106 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1109 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1110 1111 PetscFunctionBegin; 1112 *nn = n; 1113 if (!ia) PetscFunctionReturn(0); 1114 if (symmetric) { 1115 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1116 nz = tia[n]; 1117 } else { 1118 tia = a->i; tja = a->j; 1119 } 1120 1121 if (!blockcompressed && bs > 1) { 1122 (*nn) *= bs; 1123 /* malloc & create the natural set of indices */ 1124 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1125 if (n) { 1126 (*ia)[0] = oshift; 1127 for (j=1; j<bs; j++) { 1128 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1129 } 1130 } 1131 1132 for (i=1; i<n; i++) { 1133 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1134 for (j=1; j<bs; j++) { 1135 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1136 } 1137 } 1138 if (n) { 1139 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1140 } 1141 1142 if (inja) { 1143 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1144 cnt = 0; 1145 for (i=0; i<n; i++) { 1146 for (j=0; j<bs; j++) { 1147 for (k=tia[i]; k<tia[i+1]; k++) { 1148 for (l=0; l<bs; l++) { 1149 (*ja)[cnt++] = bs*tja[k] + l; 1150 } 1151 } 1152 } 1153 } 1154 } 1155 1156 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1157 ierr = PetscFree(tia);CHKERRQ(ierr); 1158 ierr = PetscFree(tja);CHKERRQ(ierr); 1159 } 1160 } else if (oshift == 1) { 1161 if (symmetric) { 1162 nz = tia[A->rmap->n/bs]; 1163 /* add 1 to i and j indices */ 1164 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1165 *ia = tia; 1166 if (ja) { 1167 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1168 *ja = tja; 1169 } 1170 } else { 1171 nz = a->i[A->rmap->n/bs]; 1172 /* malloc space and add 1 to i and j indices */ 1173 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1174 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1175 if (ja) { 1176 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1177 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1178 } 1179 } 1180 } else { 1181 *ia = tia; 1182 if (ja) *ja = tja; 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1188 { 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 if (!ia) PetscFunctionReturn(0); 1193 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1194 ierr = PetscFree(*ia);CHKERRQ(ierr); 1195 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1201 { 1202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 #if defined(PETSC_USE_LOG) 1207 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1208 #endif 1209 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1210 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1211 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1212 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1213 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1214 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1215 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1216 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1217 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1218 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1219 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1220 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1221 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1222 1223 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1224 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1225 ierr = PetscFree(A->data);CHKERRQ(ierr); 1226 1227 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1238 #if defined(PETSC_HAVE_HYPRE) 1239 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1240 #endif 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1245 { 1246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 switch (op) { 1251 case MAT_ROW_ORIENTED: 1252 a->roworiented = flg; 1253 break; 1254 case MAT_KEEP_NONZERO_PATTERN: 1255 a->keepnonzeropattern = flg; 1256 break; 1257 case MAT_NEW_NONZERO_LOCATIONS: 1258 a->nonew = (flg ? 0 : 1); 1259 break; 1260 case MAT_NEW_NONZERO_LOCATION_ERR: 1261 a->nonew = (flg ? -1 : 0); 1262 break; 1263 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1264 a->nonew = (flg ? -2 : 0); 1265 break; 1266 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1267 a->nounused = (flg ? -1 : 0); 1268 break; 1269 case MAT_NEW_DIAGONALS: 1270 case MAT_IGNORE_OFF_PROC_ENTRIES: 1271 case MAT_USE_HASH_TABLE: 1272 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1273 break; 1274 case MAT_SPD: 1275 case MAT_SYMMETRIC: 1276 case MAT_STRUCTURALLY_SYMMETRIC: 1277 case MAT_HERMITIAN: 1278 case MAT_SYMMETRY_ETERNAL: 1279 case MAT_SUBMAT_SINGLEIS: 1280 /* These options are handled directly by MatSetOption() */ 1281 break; 1282 default: 1283 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1284 } 1285 PetscFunctionReturn(0); 1286 } 1287 1288 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1289 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1290 { 1291 PetscErrorCode ierr; 1292 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1293 MatScalar *aa_i; 1294 PetscScalar *v_i; 1295 1296 PetscFunctionBegin; 1297 bs = A->rmap->bs; 1298 bs2 = bs*bs; 1299 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1300 1301 bn = row/bs; /* Block number */ 1302 bp = row % bs; /* Block Position */ 1303 M = ai[bn+1] - ai[bn]; 1304 *nz = bs*M; 1305 1306 if (v) { 1307 *v = 0; 1308 if (*nz) { 1309 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1310 for (i=0; i<M; i++) { /* for each block in the block row */ 1311 v_i = *v + i*bs; 1312 aa_i = aa + bs2*(ai[bn] + i); 1313 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1314 } 1315 } 1316 } 1317 1318 if (idx) { 1319 *idx = 0; 1320 if (*nz) { 1321 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1322 for (i=0; i<M; i++) { /* for each block in the block row */ 1323 idx_i = *idx + i*bs; 1324 itmp = bs*aj[ai[bn] + i]; 1325 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1326 } 1327 } 1328 } 1329 PetscFunctionReturn(0); 1330 } 1331 1332 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1333 { 1334 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1335 PetscErrorCode ierr; 1336 1337 PetscFunctionBegin; 1338 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1343 { 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1348 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1349 PetscFunctionReturn(0); 1350 } 1351 1352 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1353 1354 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1355 { 1356 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1357 Mat C; 1358 PetscErrorCode ierr; 1359 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1360 PetscInt *rows,*cols,bs2=a->bs2; 1361 MatScalar *array; 1362 1363 PetscFunctionBegin; 1364 if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1365 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1366 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1367 1368 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1369 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1370 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1371 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1372 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1373 ierr = PetscFree(col);CHKERRQ(ierr); 1374 } else { 1375 C = *B; 1376 } 1377 1378 array = a->a; 1379 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1380 for (i=0; i<mbs; i++) { 1381 cols[0] = i*bs; 1382 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1383 len = ai[i+1] - ai[i]; 1384 for (j=0; j<len; j++) { 1385 rows[0] = (*aj++)*bs; 1386 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1387 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1388 array += bs2; 1389 } 1390 } 1391 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1392 1393 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1394 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 1396 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1397 *B = C; 1398 } else { 1399 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1405 { 1406 PetscErrorCode ierr; 1407 Mat Btrans; 1408 1409 PetscFunctionBegin; 1410 *f = PETSC_FALSE; 1411 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1412 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1413 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1418 { 1419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1420 PetscErrorCode ierr; 1421 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1422 int fd; 1423 PetscScalar *aa; 1424 FILE *file; 1425 1426 PetscFunctionBegin; 1427 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1428 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1429 col_lens[0] = MAT_FILE_CLASSID; 1430 1431 col_lens[1] = A->rmap->N; 1432 col_lens[2] = A->cmap->n; 1433 col_lens[3] = a->nz*bs2; 1434 1435 /* store lengths of each row and write (including header) to file */ 1436 count = 0; 1437 for (i=0; i<a->mbs; i++) { 1438 for (j=0; j<bs; j++) { 1439 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1440 } 1441 } 1442 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1443 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1444 1445 /* store column indices (zero start index) */ 1446 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1447 count = 0; 1448 for (i=0; i<a->mbs; i++) { 1449 for (j=0; j<bs; j++) { 1450 for (k=a->i[i]; k<a->i[i+1]; k++) { 1451 for (l=0; l<bs; l++) { 1452 jj[count++] = bs*a->j[k] + l; 1453 } 1454 } 1455 } 1456 } 1457 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1458 ierr = PetscFree(jj);CHKERRQ(ierr); 1459 1460 /* store nonzero values */ 1461 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1462 count = 0; 1463 for (i=0; i<a->mbs; i++) { 1464 for (j=0; j<bs; j++) { 1465 for (k=a->i[i]; k<a->i[i+1]; k++) { 1466 for (l=0; l<bs; l++) { 1467 aa[count++] = a->a[bs2*k + l*bs + j]; 1468 } 1469 } 1470 } 1471 } 1472 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1473 ierr = PetscFree(aa);CHKERRQ(ierr); 1474 1475 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1476 if (file) { 1477 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1478 } 1479 PetscFunctionReturn(0); 1480 } 1481 1482 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1483 { 1484 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1485 PetscErrorCode ierr; 1486 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1487 PetscViewerFormat format; 1488 1489 PetscFunctionBegin; 1490 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1491 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1492 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1493 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1494 const char *matname; 1495 Mat aij; 1496 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1497 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1498 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1499 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1500 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1501 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1502 PetscFunctionReturn(0); 1503 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1504 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1505 for (i=0; i<a->mbs; i++) { 1506 for (j=0; j<bs; j++) { 1507 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1508 for (k=a->i[i]; k<a->i[i+1]; k++) { 1509 for (l=0; l<bs; l++) { 1510 #if defined(PETSC_USE_COMPLEX) 1511 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1512 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1513 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1514 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1515 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1516 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1517 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1518 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1519 } 1520 #else 1521 if (a->a[bs2*k + l*bs + j] != 0.0) { 1522 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1523 } 1524 #endif 1525 } 1526 } 1527 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1528 } 1529 } 1530 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1531 } else { 1532 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1533 for (i=0; i<a->mbs; i++) { 1534 for (j=0; j<bs; j++) { 1535 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1536 for (k=a->i[i]; k<a->i[i+1]; k++) { 1537 for (l=0; l<bs; l++) { 1538 #if defined(PETSC_USE_COMPLEX) 1539 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1540 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1541 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1542 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1543 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1544 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1545 } else { 1546 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1547 } 1548 #else 1549 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1550 #endif 1551 } 1552 } 1553 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1554 } 1555 } 1556 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1557 } 1558 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #include <petscdraw.h> 1563 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1564 { 1565 Mat A = (Mat) Aa; 1566 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1567 PetscErrorCode ierr; 1568 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1569 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1570 MatScalar *aa; 1571 PetscViewer viewer; 1572 PetscViewerFormat format; 1573 1574 PetscFunctionBegin; 1575 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1576 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1577 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1578 1579 /* loop over matrix elements drawing boxes */ 1580 1581 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1582 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1583 /* Blue for negative, Cyan for zero and Red for positive */ 1584 color = PETSC_DRAW_BLUE; 1585 for (i=0,row=0; i<mbs; i++,row+=bs) { 1586 for (j=a->i[i]; j<a->i[i+1]; j++) { 1587 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1588 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1589 aa = a->a + j*bs2; 1590 for (k=0; k<bs; k++) { 1591 for (l=0; l<bs; l++) { 1592 if (PetscRealPart(*aa++) >= 0.) continue; 1593 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1594 } 1595 } 1596 } 1597 } 1598 color = PETSC_DRAW_CYAN; 1599 for (i=0,row=0; i<mbs; i++,row+=bs) { 1600 for (j=a->i[i]; j<a->i[i+1]; j++) { 1601 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1602 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1603 aa = a->a + j*bs2; 1604 for (k=0; k<bs; k++) { 1605 for (l=0; l<bs; l++) { 1606 if (PetscRealPart(*aa++) != 0.) continue; 1607 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1608 } 1609 } 1610 } 1611 } 1612 color = PETSC_DRAW_RED; 1613 for (i=0,row=0; i<mbs; i++,row+=bs) { 1614 for (j=a->i[i]; j<a->i[i+1]; j++) { 1615 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1616 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1617 aa = a->a + j*bs2; 1618 for (k=0; k<bs; k++) { 1619 for (l=0; l<bs; l++) { 1620 if (PetscRealPart(*aa++) <= 0.) continue; 1621 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1622 } 1623 } 1624 } 1625 } 1626 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1627 } else { 1628 /* use contour shading to indicate magnitude of values */ 1629 /* first determine max of all nonzero values */ 1630 PetscReal minv = 0.0, maxv = 0.0; 1631 PetscDraw popup; 1632 1633 for (i=0; i<a->nz*a->bs2; i++) { 1634 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1635 } 1636 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1637 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1638 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1639 1640 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1641 for (i=0,row=0; i<mbs; i++,row+=bs) { 1642 for (j=a->i[i]; j<a->i[i+1]; j++) { 1643 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1644 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1645 aa = a->a + j*bs2; 1646 for (k=0; k<bs; k++) { 1647 for (l=0; l<bs; l++) { 1648 MatScalar v = *aa++; 1649 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1650 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1651 } 1652 } 1653 } 1654 } 1655 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1656 } 1657 PetscFunctionReturn(0); 1658 } 1659 1660 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1661 { 1662 PetscErrorCode ierr; 1663 PetscReal xl,yl,xr,yr,w,h; 1664 PetscDraw draw; 1665 PetscBool isnull; 1666 1667 PetscFunctionBegin; 1668 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1669 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1670 if (isnull) PetscFunctionReturn(0); 1671 1672 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1673 xr += w; yr += h; xl = -w; yl = -h; 1674 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1675 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1676 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1677 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1678 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1683 { 1684 PetscErrorCode ierr; 1685 PetscBool iascii,isbinary,isdraw; 1686 1687 PetscFunctionBegin; 1688 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1689 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1690 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1691 if (iascii) { 1692 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1693 } else if (isbinary) { 1694 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1695 } else if (isdraw) { 1696 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1697 } else { 1698 Mat B; 1699 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1700 ierr = MatView(B,viewer);CHKERRQ(ierr); 1701 ierr = MatDestroy(&B);CHKERRQ(ierr); 1702 } 1703 PetscFunctionReturn(0); 1704 } 1705 1706 1707 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1708 { 1709 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1710 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1711 PetscInt *ai = a->i,*ailen = a->ilen; 1712 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1713 MatScalar *ap,*aa = a->a; 1714 1715 PetscFunctionBegin; 1716 for (k=0; k<m; k++) { /* loop over rows */ 1717 row = im[k]; brow = row/bs; 1718 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1719 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1720 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1721 nrow = ailen[brow]; 1722 for (l=0; l<n; l++) { /* loop over columns */ 1723 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1724 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1725 col = in[l]; 1726 bcol = col/bs; 1727 cidx = col%bs; 1728 ridx = row%bs; 1729 high = nrow; 1730 low = 0; /* assume unsorted */ 1731 while (high-low > 5) { 1732 t = (low+high)/2; 1733 if (rp[t] > bcol) high = t; 1734 else low = t; 1735 } 1736 for (i=low; i<high; i++) { 1737 if (rp[i] > bcol) break; 1738 if (rp[i] == bcol) { 1739 *v++ = ap[bs2*i+bs*cidx+ridx]; 1740 goto finished; 1741 } 1742 } 1743 *v++ = 0.0; 1744 finished:; 1745 } 1746 } 1747 PetscFunctionReturn(0); 1748 } 1749 1750 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1751 { 1752 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1753 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1754 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1755 PetscErrorCode ierr; 1756 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1757 PetscBool roworiented=a->roworiented; 1758 const PetscScalar *value = v; 1759 MatScalar *ap,*aa = a->a,*bap; 1760 1761 PetscFunctionBegin; 1762 if (roworiented) { 1763 stepval = (n-1)*bs; 1764 } else { 1765 stepval = (m-1)*bs; 1766 } 1767 for (k=0; k<m; k++) { /* loop over added rows */ 1768 row = im[k]; 1769 if (row < 0) continue; 1770 #if defined(PETSC_USE_DEBUG) 1771 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1772 #endif 1773 rp = aj + ai[row]; 1774 ap = aa + bs2*ai[row]; 1775 rmax = imax[row]; 1776 nrow = ailen[row]; 1777 low = 0; 1778 high = nrow; 1779 for (l=0; l<n; l++) { /* loop over added columns */ 1780 if (in[l] < 0) continue; 1781 #if defined(PETSC_USE_DEBUG) 1782 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1783 #endif 1784 col = in[l]; 1785 if (roworiented) { 1786 value = v + (k*(stepval+bs) + l)*bs; 1787 } else { 1788 value = v + (l*(stepval+bs) + k)*bs; 1789 } 1790 if (col <= lastcol) low = 0; 1791 else high = nrow; 1792 lastcol = col; 1793 while (high-low > 7) { 1794 t = (low+high)/2; 1795 if (rp[t] > col) high = t; 1796 else low = t; 1797 } 1798 for (i=low; i<high; i++) { 1799 if (rp[i] > col) break; 1800 if (rp[i] == col) { 1801 bap = ap + bs2*i; 1802 if (roworiented) { 1803 if (is == ADD_VALUES) { 1804 for (ii=0; ii<bs; ii++,value+=stepval) { 1805 for (jj=ii; jj<bs2; jj+=bs) { 1806 bap[jj] += *value++; 1807 } 1808 } 1809 } else { 1810 for (ii=0; ii<bs; ii++,value+=stepval) { 1811 for (jj=ii; jj<bs2; jj+=bs) { 1812 bap[jj] = *value++; 1813 } 1814 } 1815 } 1816 } else { 1817 if (is == ADD_VALUES) { 1818 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1819 for (jj=0; jj<bs; jj++) { 1820 bap[jj] += value[jj]; 1821 } 1822 bap += bs; 1823 } 1824 } else { 1825 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1826 for (jj=0; jj<bs; jj++) { 1827 bap[jj] = value[jj]; 1828 } 1829 bap += bs; 1830 } 1831 } 1832 } 1833 goto noinsert2; 1834 } 1835 } 1836 if (nonew == 1) goto noinsert2; 1837 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1838 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1839 N = nrow++ - 1; high++; 1840 /* shift up all the later entries in this row */ 1841 for (ii=N; ii>=i; ii--) { 1842 rp[ii+1] = rp[ii]; 1843 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1844 } 1845 if (N >= i) { 1846 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1847 } 1848 rp[i] = col; 1849 bap = ap + bs2*i; 1850 if (roworiented) { 1851 for (ii=0; ii<bs; ii++,value+=stepval) { 1852 for (jj=ii; jj<bs2; jj+=bs) { 1853 bap[jj] = *value++; 1854 } 1855 } 1856 } else { 1857 for (ii=0; ii<bs; ii++,value+=stepval) { 1858 for (jj=0; jj<bs; jj++) { 1859 *bap++ = *value++; 1860 } 1861 } 1862 } 1863 noinsert2:; 1864 low = i; 1865 } 1866 ailen[row] = nrow; 1867 } 1868 PetscFunctionReturn(0); 1869 } 1870 1871 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1872 { 1873 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1874 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1875 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1876 PetscErrorCode ierr; 1877 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1878 MatScalar *aa = a->a,*ap; 1879 PetscReal ratio=0.6; 1880 1881 PetscFunctionBegin; 1882 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1883 1884 if (m) rmax = ailen[0]; 1885 for (i=1; i<mbs; i++) { 1886 /* move each row back by the amount of empty slots (fshift) before it*/ 1887 fshift += imax[i-1] - ailen[i-1]; 1888 rmax = PetscMax(rmax,ailen[i]); 1889 if (fshift) { 1890 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1891 N = ailen[i]; 1892 for (j=0; j<N; j++) { 1893 ip[j-fshift] = ip[j]; 1894 1895 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1896 } 1897 } 1898 ai[i] = ai[i-1] + ailen[i-1]; 1899 } 1900 if (mbs) { 1901 fshift += imax[mbs-1] - ailen[mbs-1]; 1902 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1903 } 1904 1905 /* reset ilen and imax for each row */ 1906 a->nonzerorowcnt = 0; 1907 for (i=0; i<mbs; i++) { 1908 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1909 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1910 } 1911 a->nz = ai[mbs]; 1912 1913 /* diagonals may have moved, so kill the diagonal pointers */ 1914 a->idiagvalid = PETSC_FALSE; 1915 if (fshift && a->diag) { 1916 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1917 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1918 a->diag = 0; 1919 } 1920 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1921 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1922 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1923 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1924 1925 A->info.mallocs += a->reallocs; 1926 a->reallocs = 0; 1927 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1928 a->rmax = rmax; 1929 1930 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 /* 1935 This function returns an array of flags which indicate the locations of contiguous 1936 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1937 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1938 Assume: sizes should be long enough to hold all the values. 1939 */ 1940 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1941 { 1942 PetscInt i,j,k,row; 1943 PetscBool flg; 1944 1945 PetscFunctionBegin; 1946 for (i=0,j=0; i<n; j++) { 1947 row = idx[i]; 1948 if (row%bs!=0) { /* Not the begining of a block */ 1949 sizes[j] = 1; 1950 i++; 1951 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1952 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1953 i++; 1954 } else { /* Begining of the block, so check if the complete block exists */ 1955 flg = PETSC_TRUE; 1956 for (k=1; k<bs; k++) { 1957 if (row+k != idx[i+k]) { /* break in the block */ 1958 flg = PETSC_FALSE; 1959 break; 1960 } 1961 } 1962 if (flg) { /* No break in the bs */ 1963 sizes[j] = bs; 1964 i += bs; 1965 } else { 1966 sizes[j] = 1; 1967 i++; 1968 } 1969 } 1970 } 1971 *bs_max = j; 1972 PetscFunctionReturn(0); 1973 } 1974 1975 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1976 { 1977 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1978 PetscErrorCode ierr; 1979 PetscInt i,j,k,count,*rows; 1980 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1981 PetscScalar zero = 0.0; 1982 MatScalar *aa; 1983 const PetscScalar *xx; 1984 PetscScalar *bb; 1985 1986 PetscFunctionBegin; 1987 /* fix right hand side if needed */ 1988 if (x && b) { 1989 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1990 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1991 for (i=0; i<is_n; i++) { 1992 bb[is_idx[i]] = diag*xx[is_idx[i]]; 1993 } 1994 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1995 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1996 } 1997 1998 /* Make a copy of the IS and sort it */ 1999 /* allocate memory for rows,sizes */ 2000 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2001 2002 /* copy IS values to rows, and sort them */ 2003 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2004 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2005 2006 if (baij->keepnonzeropattern) { 2007 for (i=0; i<is_n; i++) sizes[i] = 1; 2008 bs_max = is_n; 2009 } else { 2010 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2011 A->nonzerostate++; 2012 } 2013 2014 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2015 row = rows[j]; 2016 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2017 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2018 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2019 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2020 if (diag != (PetscScalar)0.0) { 2021 if (baij->ilen[row/bs] > 0) { 2022 baij->ilen[row/bs] = 1; 2023 baij->j[baij->i[row/bs]] = row/bs; 2024 2025 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2026 } 2027 /* Now insert all the diagonal values for this bs */ 2028 for (k=0; k<bs; k++) { 2029 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2030 } 2031 } else { /* (diag == 0.0) */ 2032 baij->ilen[row/bs] = 0; 2033 } /* end (diag == 0.0) */ 2034 } else { /* (sizes[i] != bs) */ 2035 #if defined(PETSC_USE_DEBUG) 2036 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2037 #endif 2038 for (k=0; k<count; k++) { 2039 aa[0] = zero; 2040 aa += bs; 2041 } 2042 if (diag != (PetscScalar)0.0) { 2043 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2044 } 2045 } 2046 } 2047 2048 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2049 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2054 { 2055 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2056 PetscErrorCode ierr; 2057 PetscInt i,j,k,count; 2058 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2059 PetscScalar zero = 0.0; 2060 MatScalar *aa; 2061 const PetscScalar *xx; 2062 PetscScalar *bb; 2063 PetscBool *zeroed,vecs = PETSC_FALSE; 2064 2065 PetscFunctionBegin; 2066 /* fix right hand side if needed */ 2067 if (x && b) { 2068 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2069 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2070 vecs = PETSC_TRUE; 2071 } 2072 2073 /* zero the columns */ 2074 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2075 for (i=0; i<is_n; i++) { 2076 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2077 zeroed[is_idx[i]] = PETSC_TRUE; 2078 } 2079 for (i=0; i<A->rmap->N; i++) { 2080 if (!zeroed[i]) { 2081 row = i/bs; 2082 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2083 for (k=0; k<bs; k++) { 2084 col = bs*baij->j[j] + k; 2085 if (zeroed[col]) { 2086 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2087 if (vecs) bb[i] -= aa[0]*xx[col]; 2088 aa[0] = 0.0; 2089 } 2090 } 2091 } 2092 } else if (vecs) bb[i] = diag*xx[i]; 2093 } 2094 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2095 if (vecs) { 2096 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2097 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2098 } 2099 2100 /* zero the rows */ 2101 for (i=0; i<is_n; i++) { 2102 row = is_idx[i]; 2103 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2104 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2105 for (k=0; k<count; k++) { 2106 aa[0] = zero; 2107 aa += bs; 2108 } 2109 if (diag != (PetscScalar)0.0) { 2110 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2111 } 2112 } 2113 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2114 PetscFunctionReturn(0); 2115 } 2116 2117 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2118 { 2119 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2120 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2121 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2122 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2123 PetscErrorCode ierr; 2124 PetscInt ridx,cidx,bs2=a->bs2; 2125 PetscBool roworiented=a->roworiented; 2126 MatScalar *ap,value,*aa=a->a,*bap; 2127 2128 PetscFunctionBegin; 2129 for (k=0; k<m; k++) { /* loop over added rows */ 2130 row = im[k]; 2131 brow = row/bs; 2132 if (row < 0) continue; 2133 #if defined(PETSC_USE_DEBUG) 2134 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2135 #endif 2136 rp = aj + ai[brow]; 2137 ap = aa + bs2*ai[brow]; 2138 rmax = imax[brow]; 2139 nrow = ailen[brow]; 2140 low = 0; 2141 high = nrow; 2142 for (l=0; l<n; l++) { /* loop over added columns */ 2143 if (in[l] < 0) continue; 2144 #if defined(PETSC_USE_DEBUG) 2145 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2146 #endif 2147 col = in[l]; bcol = col/bs; 2148 ridx = row % bs; cidx = col % bs; 2149 if (roworiented) { 2150 value = v[l + k*n]; 2151 } else { 2152 value = v[k + l*m]; 2153 } 2154 if (col <= lastcol) low = 0; else high = nrow; 2155 lastcol = col; 2156 while (high-low > 7) { 2157 t = (low+high)/2; 2158 if (rp[t] > bcol) high = t; 2159 else low = t; 2160 } 2161 for (i=low; i<high; i++) { 2162 if (rp[i] > bcol) break; 2163 if (rp[i] == bcol) { 2164 bap = ap + bs2*i + bs*cidx + ridx; 2165 if (is == ADD_VALUES) *bap += value; 2166 else *bap = value; 2167 goto noinsert1; 2168 } 2169 } 2170 if (nonew == 1) goto noinsert1; 2171 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2172 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2173 N = nrow++ - 1; high++; 2174 /* shift up all the later entries in this row */ 2175 for (ii=N; ii>=i; ii--) { 2176 rp[ii+1] = rp[ii]; 2177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2178 } 2179 if (N>=i) { 2180 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2181 } 2182 rp[i] = bcol; 2183 ap[bs2*i + bs*cidx + ridx] = value; 2184 a->nz++; 2185 A->nonzerostate++; 2186 noinsert1:; 2187 low = i; 2188 } 2189 ailen[brow] = nrow; 2190 } 2191 PetscFunctionReturn(0); 2192 } 2193 2194 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2195 { 2196 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2197 Mat outA; 2198 PetscErrorCode ierr; 2199 PetscBool row_identity,col_identity; 2200 2201 PetscFunctionBegin; 2202 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2203 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2204 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2205 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2206 2207 outA = inA; 2208 inA->factortype = MAT_FACTOR_LU; 2209 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2210 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2211 2212 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2213 2214 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2215 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2216 a->row = row; 2217 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2218 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2219 a->col = col; 2220 2221 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2222 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2223 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2224 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2225 2226 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2227 if (!a->solve_work) { 2228 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2229 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2230 } 2231 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2232 PetscFunctionReturn(0); 2233 } 2234 2235 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2236 { 2237 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2238 PetscInt i,nz,mbs; 2239 2240 PetscFunctionBegin; 2241 nz = baij->maxnz; 2242 mbs = baij->mbs; 2243 for (i=0; i<nz; i++) { 2244 baij->j[i] = indices[i]; 2245 } 2246 baij->nz = nz; 2247 for (i=0; i<mbs; i++) { 2248 baij->ilen[i] = baij->imax[i]; 2249 } 2250 PetscFunctionReturn(0); 2251 } 2252 2253 /*@ 2254 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2255 in the matrix. 2256 2257 Input Parameters: 2258 + mat - the SeqBAIJ matrix 2259 - indices - the column indices 2260 2261 Level: advanced 2262 2263 Notes: 2264 This can be called if you have precomputed the nonzero structure of the 2265 matrix and want to provide it to the matrix object to improve the performance 2266 of the MatSetValues() operation. 2267 2268 You MUST have set the correct numbers of nonzeros per row in the call to 2269 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2270 2271 MUST be called before any calls to MatSetValues(); 2272 2273 @*/ 2274 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2275 { 2276 PetscErrorCode ierr; 2277 2278 PetscFunctionBegin; 2279 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2280 PetscValidPointer(indices,2); 2281 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2282 PetscFunctionReturn(0); 2283 } 2284 2285 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2286 { 2287 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2288 PetscErrorCode ierr; 2289 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2290 PetscReal atmp; 2291 PetscScalar *x,zero = 0.0; 2292 MatScalar *aa; 2293 PetscInt ncols,brow,krow,kcol; 2294 2295 PetscFunctionBegin; 2296 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2297 bs = A->rmap->bs; 2298 aa = a->a; 2299 ai = a->i; 2300 aj = a->j; 2301 mbs = a->mbs; 2302 2303 ierr = VecSet(v,zero);CHKERRQ(ierr); 2304 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2305 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2306 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2307 for (i=0; i<mbs; i++) { 2308 ncols = ai[1] - ai[0]; ai++; 2309 brow = bs*i; 2310 for (j=0; j<ncols; j++) { 2311 for (kcol=0; kcol<bs; kcol++) { 2312 for (krow=0; krow<bs; krow++) { 2313 atmp = PetscAbsScalar(*aa);aa++; 2314 row = brow + krow; /* row index */ 2315 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2316 } 2317 } 2318 aj++; 2319 } 2320 } 2321 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2322 PetscFunctionReturn(0); 2323 } 2324 2325 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2326 { 2327 PetscErrorCode ierr; 2328 2329 PetscFunctionBegin; 2330 /* If the two matrices have the same copy implementation, use fast copy. */ 2331 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2333 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2334 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2335 2336 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2337 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2338 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2339 } else { 2340 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2341 } 2342 PetscFunctionReturn(0); 2343 } 2344 2345 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2346 { 2347 PetscErrorCode ierr; 2348 2349 PetscFunctionBegin; 2350 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2351 PetscFunctionReturn(0); 2352 } 2353 2354 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2355 { 2356 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2357 2358 PetscFunctionBegin; 2359 *array = a->a; 2360 PetscFunctionReturn(0); 2361 } 2362 2363 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2364 { 2365 PetscFunctionBegin; 2366 PetscFunctionReturn(0); 2367 } 2368 2369 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2370 { 2371 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2372 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2373 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2374 PetscErrorCode ierr; 2375 2376 PetscFunctionBegin; 2377 /* Set the number of nonzeros in the new matrix */ 2378 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2383 { 2384 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2385 PetscErrorCode ierr; 2386 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2387 PetscBLASInt one=1; 2388 2389 PetscFunctionBegin; 2390 if (str == SAME_NONZERO_PATTERN) { 2391 PetscScalar alpha = a; 2392 PetscBLASInt bnz; 2393 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2394 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2395 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2396 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2397 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2398 } else { 2399 Mat B; 2400 PetscInt *nnz; 2401 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2402 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2403 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2404 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2405 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2406 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2407 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2408 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2409 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2410 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2411 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2412 ierr = PetscFree(nnz);CHKERRQ(ierr); 2413 } 2414 PetscFunctionReturn(0); 2415 } 2416 2417 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2418 { 2419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2420 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2421 MatScalar *aa = a->a; 2422 2423 PetscFunctionBegin; 2424 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2429 { 2430 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2431 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2432 MatScalar *aa = a->a; 2433 2434 PetscFunctionBegin; 2435 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 /* 2440 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2441 */ 2442 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2443 { 2444 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2445 PetscErrorCode ierr; 2446 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2447 PetscInt nz = a->i[m],row,*jj,mr,col; 2448 2449 PetscFunctionBegin; 2450 *nn = n; 2451 if (!ia) PetscFunctionReturn(0); 2452 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2453 else { 2454 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2455 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2456 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2457 jj = a->j; 2458 for (i=0; i<nz; i++) { 2459 collengths[jj[i]]++; 2460 } 2461 cia[0] = oshift; 2462 for (i=0; i<n; i++) { 2463 cia[i+1] = cia[i] + collengths[i]; 2464 } 2465 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2466 jj = a->j; 2467 for (row=0; row<m; row++) { 2468 mr = a->i[row+1] - a->i[row]; 2469 for (i=0; i<mr; i++) { 2470 col = *jj++; 2471 2472 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2473 } 2474 } 2475 ierr = PetscFree(collengths);CHKERRQ(ierr); 2476 *ia = cia; *ja = cja; 2477 } 2478 PetscFunctionReturn(0); 2479 } 2480 2481 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2482 { 2483 PetscErrorCode ierr; 2484 2485 PetscFunctionBegin; 2486 if (!ia) PetscFunctionReturn(0); 2487 ierr = PetscFree(*ia);CHKERRQ(ierr); 2488 ierr = PetscFree(*ja);CHKERRQ(ierr); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 /* 2493 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2494 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2495 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2496 */ 2497 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2498 { 2499 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2500 PetscErrorCode ierr; 2501 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2502 PetscInt nz = a->i[m],row,*jj,mr,col; 2503 PetscInt *cspidx; 2504 2505 PetscFunctionBegin; 2506 *nn = n; 2507 if (!ia) PetscFunctionReturn(0); 2508 2509 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2510 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2511 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2512 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2513 jj = a->j; 2514 for (i=0; i<nz; i++) { 2515 collengths[jj[i]]++; 2516 } 2517 cia[0] = oshift; 2518 for (i=0; i<n; i++) { 2519 cia[i+1] = cia[i] + collengths[i]; 2520 } 2521 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2522 jj = a->j; 2523 for (row=0; row<m; row++) { 2524 mr = a->i[row+1] - a->i[row]; 2525 for (i=0; i<mr; i++) { 2526 col = *jj++; 2527 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2528 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2529 } 2530 } 2531 ierr = PetscFree(collengths);CHKERRQ(ierr); 2532 *ia = cia; *ja = cja; 2533 *spidx = cspidx; 2534 PetscFunctionReturn(0); 2535 } 2536 2537 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2538 { 2539 PetscErrorCode ierr; 2540 2541 PetscFunctionBegin; 2542 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2543 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2548 { 2549 PetscErrorCode ierr; 2550 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2551 2552 PetscFunctionBegin; 2553 if (!Y->preallocated || !aij->nz) { 2554 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2555 } 2556 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 /* -------------------------------------------------------------------*/ 2561 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2562 MatGetRow_SeqBAIJ, 2563 MatRestoreRow_SeqBAIJ, 2564 MatMult_SeqBAIJ_N, 2565 /* 4*/ MatMultAdd_SeqBAIJ_N, 2566 MatMultTranspose_SeqBAIJ, 2567 MatMultTransposeAdd_SeqBAIJ, 2568 0, 2569 0, 2570 0, 2571 /* 10*/ 0, 2572 MatLUFactor_SeqBAIJ, 2573 0, 2574 0, 2575 MatTranspose_SeqBAIJ, 2576 /* 15*/ MatGetInfo_SeqBAIJ, 2577 MatEqual_SeqBAIJ, 2578 MatGetDiagonal_SeqBAIJ, 2579 MatDiagonalScale_SeqBAIJ, 2580 MatNorm_SeqBAIJ, 2581 /* 20*/ 0, 2582 MatAssemblyEnd_SeqBAIJ, 2583 MatSetOption_SeqBAIJ, 2584 MatZeroEntries_SeqBAIJ, 2585 /* 24*/ MatZeroRows_SeqBAIJ, 2586 0, 2587 0, 2588 0, 2589 0, 2590 /* 29*/ MatSetUp_SeqBAIJ, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /* 34*/ MatDuplicate_SeqBAIJ, 2596 0, 2597 0, 2598 MatILUFactor_SeqBAIJ, 2599 0, 2600 /* 39*/ MatAXPY_SeqBAIJ, 2601 MatCreateSubMatrices_SeqBAIJ, 2602 MatIncreaseOverlap_SeqBAIJ, 2603 MatGetValues_SeqBAIJ, 2604 MatCopy_SeqBAIJ, 2605 /* 44*/ 0, 2606 MatScale_SeqBAIJ, 2607 MatShift_SeqBAIJ, 2608 0, 2609 MatZeroRowsColumns_SeqBAIJ, 2610 /* 49*/ 0, 2611 MatGetRowIJ_SeqBAIJ, 2612 MatRestoreRowIJ_SeqBAIJ, 2613 MatGetColumnIJ_SeqBAIJ, 2614 MatRestoreColumnIJ_SeqBAIJ, 2615 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2616 0, 2617 0, 2618 0, 2619 MatSetValuesBlocked_SeqBAIJ, 2620 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2621 MatDestroy_SeqBAIJ, 2622 MatView_SeqBAIJ, 2623 0, 2624 0, 2625 /* 64*/ 0, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2631 0, 2632 MatConvert_Basic, 2633 0, 2634 0, 2635 /* 74*/ 0, 2636 MatFDColoringApply_BAIJ, 2637 0, 2638 0, 2639 0, 2640 /* 79*/ 0, 2641 0, 2642 0, 2643 0, 2644 MatLoad_SeqBAIJ, 2645 /* 84*/ 0, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /* 89*/ 0, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /* 94*/ 0, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /* 99*/ 0, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*104*/ 0, 2666 MatRealPart_SeqBAIJ, 2667 MatImaginaryPart_SeqBAIJ, 2668 0, 2669 0, 2670 /*109*/ 0, 2671 0, 2672 0, 2673 0, 2674 MatMissingDiagonal_SeqBAIJ, 2675 /*114*/ 0, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*119*/ 0, 2681 0, 2682 MatMultHermitianTranspose_SeqBAIJ, 2683 MatMultHermitianTransposeAdd_SeqBAIJ, 2684 0, 2685 /*124*/ 0, 2686 0, 2687 MatInvertBlockDiagonal_SeqBAIJ, 2688 0, 2689 0, 2690 /*129*/ 0, 2691 0, 2692 0, 2693 0, 2694 0, 2695 /*134*/ 0, 2696 0, 2697 0, 2698 0, 2699 0, 2700 /*139*/ MatSetBlockSizes_Default, 2701 0, 2702 0, 2703 MatFDColoringSetUp_SeqXAIJ, 2704 0, 2705 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2706 }; 2707 2708 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2709 { 2710 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2711 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2712 PetscErrorCode ierr; 2713 2714 PetscFunctionBegin; 2715 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2716 2717 /* allocate space for values if not already there */ 2718 if (!aij->saved_values) { 2719 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2720 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2721 } 2722 2723 /* copy values over */ 2724 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2725 PetscFunctionReturn(0); 2726 } 2727 2728 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2729 { 2730 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2731 PetscErrorCode ierr; 2732 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2733 2734 PetscFunctionBegin; 2735 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2736 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2737 2738 /* copy values over */ 2739 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2744 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2745 2746 static PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2747 { 2748 Mat_SeqBAIJ *b; 2749 PetscErrorCode ierr; 2750 PetscInt i,mbs,nbs,bs2; 2751 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2752 2753 PetscFunctionBegin; 2754 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2755 if (nz == MAT_SKIP_ALLOCATION) { 2756 skipallocation = PETSC_TRUE; 2757 nz = 0; 2758 } 2759 2760 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2761 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2762 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2763 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2764 2765 B->preallocated = PETSC_TRUE; 2766 2767 mbs = B->rmap->n/bs; 2768 nbs = B->cmap->n/bs; 2769 bs2 = bs*bs; 2770 2771 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2772 2773 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2774 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2775 if (nnz) { 2776 for (i=0; i<mbs; i++) { 2777 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2778 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2779 } 2780 } 2781 2782 b = (Mat_SeqBAIJ*)B->data; 2783 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2784 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2785 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2786 2787 if (!flg) { 2788 switch (bs) { 2789 case 1: 2790 B->ops->mult = MatMult_SeqBAIJ_1; 2791 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2792 break; 2793 case 2: 2794 B->ops->mult = MatMult_SeqBAIJ_2; 2795 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2796 break; 2797 case 3: 2798 B->ops->mult = MatMult_SeqBAIJ_3; 2799 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2800 break; 2801 case 4: 2802 B->ops->mult = MatMult_SeqBAIJ_4; 2803 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2804 break; 2805 case 5: 2806 B->ops->mult = MatMult_SeqBAIJ_5; 2807 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2808 break; 2809 case 6: 2810 B->ops->mult = MatMult_SeqBAIJ_6; 2811 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2812 break; 2813 case 7: 2814 B->ops->mult = MatMult_SeqBAIJ_7; 2815 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2816 break; 2817 case 11: 2818 B->ops->mult = MatMult_SeqBAIJ_11; 2819 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2820 break; 2821 case 15: 2822 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2823 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2824 break; 2825 default: 2826 B->ops->mult = MatMult_SeqBAIJ_N; 2827 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2828 break; 2829 } 2830 } 2831 B->ops->sor = MatSOR_SeqBAIJ; 2832 b->mbs = mbs; 2833 b->nbs = nbs; 2834 if (!skipallocation) { 2835 if (!b->imax) { 2836 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2837 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2838 2839 b->free_imax_ilen = PETSC_TRUE; 2840 } 2841 /* b->ilen will count nonzeros in each block row so far. */ 2842 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2843 if (!nnz) { 2844 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2845 else if (nz < 0) nz = 1; 2846 for (i=0; i<mbs; i++) b->imax[i] = nz; 2847 nz = nz*mbs; 2848 } else { 2849 nz = 0; 2850 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2851 } 2852 2853 /* allocate the matrix space */ 2854 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2855 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2856 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2857 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2858 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2859 2860 b->singlemalloc = PETSC_TRUE; 2861 b->i[0] = 0; 2862 for (i=1; i<mbs+1; i++) { 2863 b->i[i] = b->i[i-1] + b->imax[i-1]; 2864 } 2865 b->free_a = PETSC_TRUE; 2866 b->free_ij = PETSC_TRUE; 2867 } else { 2868 b->free_a = PETSC_FALSE; 2869 b->free_ij = PETSC_FALSE; 2870 } 2871 2872 b->bs2 = bs2; 2873 b->mbs = mbs; 2874 b->nz = 0; 2875 b->maxnz = nz; 2876 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2877 B->was_assembled = PETSC_FALSE; 2878 B->assembled = PETSC_FALSE; 2879 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2880 PetscFunctionReturn(0); 2881 } 2882 2883 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2884 { 2885 PetscInt i,m,nz,nz_max=0,*nnz; 2886 PetscScalar *values=0; 2887 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2888 PetscErrorCode ierr; 2889 2890 PetscFunctionBegin; 2891 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2892 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2893 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2894 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2895 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2896 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2897 m = B->rmap->n/bs; 2898 2899 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2900 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2901 for (i=0; i<m; i++) { 2902 nz = ii[i+1]- ii[i]; 2903 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2904 nz_max = PetscMax(nz_max, nz); 2905 nnz[i] = nz; 2906 } 2907 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2908 ierr = PetscFree(nnz);CHKERRQ(ierr); 2909 2910 values = (PetscScalar*)V; 2911 if (!values) { 2912 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2913 } 2914 for (i=0; i<m; i++) { 2915 PetscInt ncols = ii[i+1] - ii[i]; 2916 const PetscInt *icols = jj + ii[i]; 2917 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2918 if (!roworiented) { 2919 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2920 } else { 2921 PetscInt j; 2922 for (j=0; j<ncols; j++) { 2923 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2924 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2925 } 2926 } 2927 } 2928 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2929 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2930 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2931 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2932 PetscFunctionReturn(0); 2933 } 2934 2935 /*MC 2936 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2937 block sparse compressed row format. 2938 2939 Options Database Keys: 2940 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2941 2942 Level: beginner 2943 2944 .seealso: MatCreateSeqBAIJ() 2945 M*/ 2946 2947 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 2948 2949 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2950 { 2951 PetscErrorCode ierr; 2952 PetscMPIInt size; 2953 Mat_SeqBAIJ *b; 2954 2955 PetscFunctionBegin; 2956 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2957 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2958 2959 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2960 B->data = (void*)b; 2961 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2962 2963 b->row = 0; 2964 b->col = 0; 2965 b->icol = 0; 2966 b->reallocs = 0; 2967 b->saved_values = 0; 2968 2969 b->roworiented = PETSC_TRUE; 2970 b->nonew = 0; 2971 b->diag = 0; 2972 B->spptr = 0; 2973 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 2974 b->keepnonzeropattern = PETSC_FALSE; 2975 2976 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2977 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2978 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2979 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2980 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2981 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2982 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2983 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 2984 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 2985 #if defined(PETSC_HAVE_HYPRE) 2986 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 2987 #endif 2988 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2989 PetscFunctionReturn(0); 2990 } 2991 2992 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 2993 { 2994 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 2995 PetscErrorCode ierr; 2996 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2997 2998 PetscFunctionBegin; 2999 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3000 3001 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3002 c->imax = a->imax; 3003 c->ilen = a->ilen; 3004 c->free_imax_ilen = PETSC_FALSE; 3005 } else { 3006 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3007 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3008 for (i=0; i<mbs; i++) { 3009 c->imax[i] = a->imax[i]; 3010 c->ilen[i] = a->ilen[i]; 3011 } 3012 c->free_imax_ilen = PETSC_TRUE; 3013 } 3014 3015 /* allocate the matrix space */ 3016 if (mallocmatspace) { 3017 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3018 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3019 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3020 3021 c->i = a->i; 3022 c->j = a->j; 3023 c->singlemalloc = PETSC_FALSE; 3024 c->free_a = PETSC_TRUE; 3025 c->free_ij = PETSC_FALSE; 3026 c->parent = A; 3027 C->preallocated = PETSC_TRUE; 3028 C->assembled = PETSC_TRUE; 3029 3030 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3031 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3032 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3033 } else { 3034 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3035 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3036 3037 c->singlemalloc = PETSC_TRUE; 3038 c->free_a = PETSC_TRUE; 3039 c->free_ij = PETSC_TRUE; 3040 3041 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3042 if (mbs > 0) { 3043 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3044 if (cpvalues == MAT_COPY_VALUES) { 3045 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3046 } else { 3047 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3048 } 3049 } 3050 C->preallocated = PETSC_TRUE; 3051 C->assembled = PETSC_TRUE; 3052 } 3053 } 3054 3055 c->roworiented = a->roworiented; 3056 c->nonew = a->nonew; 3057 3058 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3059 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3060 3061 c->bs2 = a->bs2; 3062 c->mbs = a->mbs; 3063 c->nbs = a->nbs; 3064 3065 if (a->diag) { 3066 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3067 c->diag = a->diag; 3068 c->free_diag = PETSC_FALSE; 3069 } else { 3070 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3071 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3072 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3073 c->free_diag = PETSC_TRUE; 3074 } 3075 } else c->diag = 0; 3076 3077 c->nz = a->nz; 3078 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3079 c->solve_work = NULL; 3080 c->mult_work = NULL; 3081 c->sor_workt = NULL; 3082 c->sor_work = NULL; 3083 3084 c->compressedrow.use = a->compressedrow.use; 3085 c->compressedrow.nrows = a->compressedrow.nrows; 3086 if (a->compressedrow.use) { 3087 i = a->compressedrow.nrows; 3088 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3089 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3090 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3091 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3092 } else { 3093 c->compressedrow.use = PETSC_FALSE; 3094 c->compressedrow.i = NULL; 3095 c->compressedrow.rindex = NULL; 3096 } 3097 C->nonzerostate = A->nonzerostate; 3098 3099 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3100 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3101 PetscFunctionReturn(0); 3102 } 3103 3104 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3105 { 3106 PetscErrorCode ierr; 3107 3108 PetscFunctionBegin; 3109 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3110 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3111 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3112 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3113 PetscFunctionReturn(0); 3114 } 3115 3116 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3117 { 3118 Mat_SeqBAIJ *a; 3119 PetscErrorCode ierr; 3120 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3121 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3122 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3123 PetscInt *masked,nmask,tmp,bs2,ishift; 3124 PetscMPIInt size; 3125 int fd; 3126 PetscScalar *aa; 3127 MPI_Comm comm; 3128 3129 PetscFunctionBegin; 3130 /* force binary viewer to load .info file if it has not yet done so */ 3131 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3132 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3133 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3134 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3135 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3136 if (bs < 0) bs = 1; 3137 bs2 = bs*bs; 3138 3139 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3140 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3141 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3142 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3143 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3144 M = header[1]; N = header[2]; nz = header[3]; 3145 3146 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3147 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3148 3149 /* 3150 This code adds extra rows to make sure the number of rows is 3151 divisible by the blocksize 3152 */ 3153 mbs = M/bs; 3154 extra_rows = bs - M + bs*(mbs); 3155 if (extra_rows == bs) extra_rows = 0; 3156 else mbs++; 3157 if (extra_rows) { 3158 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3159 } 3160 3161 /* Set global sizes if not already set */ 3162 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3163 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3164 } else { /* Check if the matrix global sizes are correct */ 3165 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3166 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3167 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3168 } 3169 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3170 } 3171 3172 /* read in row lengths */ 3173 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3174 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3175 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3176 3177 /* read in column indices */ 3178 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3179 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3180 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3181 3182 /* loop over row lengths determining block row lengths */ 3183 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3184 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3185 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3186 rowcount = 0; 3187 nzcount = 0; 3188 for (i=0; i<mbs; i++) { 3189 nmask = 0; 3190 for (j=0; j<bs; j++) { 3191 kmax = rowlengths[rowcount]; 3192 for (k=0; k<kmax; k++) { 3193 tmp = jj[nzcount++]/bs; 3194 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3195 } 3196 rowcount++; 3197 } 3198 browlengths[i] += nmask; 3199 /* zero out the mask elements we set */ 3200 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3201 } 3202 3203 /* Do preallocation */ 3204 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3205 a = (Mat_SeqBAIJ*)newmat->data; 3206 3207 /* set matrix "i" values */ 3208 a->i[0] = 0; 3209 for (i=1; i<= mbs; i++) { 3210 a->i[i] = a->i[i-1] + browlengths[i-1]; 3211 a->ilen[i-1] = browlengths[i-1]; 3212 } 3213 a->nz = 0; 3214 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3215 3216 /* read in nonzero values */ 3217 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3218 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3219 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3220 3221 /* set "a" and "j" values into matrix */ 3222 nzcount = 0; jcount = 0; 3223 for (i=0; i<mbs; i++) { 3224 nzcountb = nzcount; 3225 nmask = 0; 3226 for (j=0; j<bs; j++) { 3227 kmax = rowlengths[i*bs+j]; 3228 for (k=0; k<kmax; k++) { 3229 tmp = jj[nzcount++]/bs; 3230 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3231 } 3232 } 3233 /* sort the masked values */ 3234 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3235 3236 /* set "j" values into matrix */ 3237 maskcount = 1; 3238 for (j=0; j<nmask; j++) { 3239 a->j[jcount++] = masked[j]; 3240 mask[masked[j]] = maskcount++; 3241 } 3242 /* set "a" values into matrix */ 3243 ishift = bs2*a->i[i]; 3244 for (j=0; j<bs; j++) { 3245 kmax = rowlengths[i*bs+j]; 3246 for (k=0; k<kmax; k++) { 3247 tmp = jj[nzcountb]/bs; 3248 block = mask[tmp] - 1; 3249 point = jj[nzcountb] - bs*tmp; 3250 idx = ishift + bs2*block + j + bs*point; 3251 a->a[idx] = (MatScalar)aa[nzcountb++]; 3252 } 3253 } 3254 /* zero out the mask elements we set */ 3255 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3256 } 3257 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3258 3259 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3260 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3261 ierr = PetscFree(aa);CHKERRQ(ierr); 3262 ierr = PetscFree(jj);CHKERRQ(ierr); 3263 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3264 3265 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3266 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3267 PetscFunctionReturn(0); 3268 } 3269 3270 /*@C 3271 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3272 compressed row) format. For good matrix assembly performance the 3273 user should preallocate the matrix storage by setting the parameter nz 3274 (or the array nnz). By setting these parameters accurately, performance 3275 during matrix assembly can be increased by more than a factor of 50. 3276 3277 Collective on MPI_Comm 3278 3279 Input Parameters: 3280 + comm - MPI communicator, set to PETSC_COMM_SELF 3281 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3282 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3283 . m - number of rows 3284 . n - number of columns 3285 . nz - number of nonzero blocks per block row (same for all rows) 3286 - nnz - array containing the number of nonzero blocks in the various block rows 3287 (possibly different for each block row) or NULL 3288 3289 Output Parameter: 3290 . A - the matrix 3291 3292 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3293 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3294 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3295 3296 Options Database Keys: 3297 . -mat_no_unroll - uses code that does not unroll the loops in the 3298 block calculations (much slower) 3299 . -mat_block_size - size of the blocks to use 3300 3301 Level: intermediate 3302 3303 Notes: 3304 The number of rows and columns must be divisible by blocksize. 3305 3306 If the nnz parameter is given then the nz parameter is ignored 3307 3308 A nonzero block is any block that as 1 or more nonzeros in it 3309 3310 The block AIJ format is fully compatible with standard Fortran 77 3311 storage. That is, the stored row and column indices can begin at 3312 either one (as in Fortran) or zero. See the users' manual for details. 3313 3314 Specify the preallocated storage with either nz or nnz (not both). 3315 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3316 allocation. See Users-Manual: ch_mat for details. 3317 matrices. 3318 3319 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3320 @*/ 3321 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3322 { 3323 PetscErrorCode ierr; 3324 3325 PetscFunctionBegin; 3326 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3327 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3328 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3329 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3330 PetscFunctionReturn(0); 3331 } 3332 3333 /*@C 3334 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3335 per row in the matrix. For good matrix assembly performance the 3336 user should preallocate the matrix storage by setting the parameter nz 3337 (or the array nnz). By setting these parameters accurately, performance 3338 during matrix assembly can be increased by more than a factor of 50. 3339 3340 Collective on MPI_Comm 3341 3342 Input Parameters: 3343 + B - the matrix 3344 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3345 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3346 . nz - number of block nonzeros per block row (same for all rows) 3347 - nnz - array containing the number of block nonzeros in the various block rows 3348 (possibly different for each block row) or NULL 3349 3350 Options Database Keys: 3351 . -mat_no_unroll - uses code that does not unroll the loops in the 3352 block calculations (much slower) 3353 . -mat_block_size - size of the blocks to use 3354 3355 Level: intermediate 3356 3357 Notes: 3358 If the nnz parameter is given then the nz parameter is ignored 3359 3360 You can call MatGetInfo() to get information on how effective the preallocation was; 3361 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3362 You can also run with the option -info and look for messages with the string 3363 malloc in them to see if additional memory allocation was needed. 3364 3365 The block AIJ format is fully compatible with standard Fortran 77 3366 storage. That is, the stored row and column indices can begin at 3367 either one (as in Fortran) or zero. See the users' manual for details. 3368 3369 Specify the preallocated storage with either nz or nnz (not both). 3370 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3371 allocation. See Users-Manual: ch_mat for details. 3372 3373 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3374 @*/ 3375 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3376 { 3377 PetscErrorCode ierr; 3378 3379 PetscFunctionBegin; 3380 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3381 PetscValidType(B,1); 3382 PetscValidLogicalCollectiveInt(B,bs,2); 3383 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3384 PetscFunctionReturn(0); 3385 } 3386 3387 /*@C 3388 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3389 (the default sequential PETSc format). 3390 3391 Collective on MPI_Comm 3392 3393 Input Parameters: 3394 + B - the matrix 3395 . i - the indices into j for the start of each local row (starts with zero) 3396 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3397 - v - optional values in the matrix 3398 3399 Level: developer 3400 3401 Notes: 3402 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3403 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3404 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3405 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3406 block column and the second index is over columns within a block. 3407 3408 .keywords: matrix, aij, compressed row, sparse 3409 3410 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3411 @*/ 3412 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3413 { 3414 PetscErrorCode ierr; 3415 3416 PetscFunctionBegin; 3417 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3418 PetscValidType(B,1); 3419 PetscValidLogicalCollectiveInt(B,bs,2); 3420 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3421 PetscFunctionReturn(0); 3422 } 3423 3424 3425 /*@ 3426 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3427 3428 Collective on MPI_Comm 3429 3430 Input Parameters: 3431 + comm - must be an MPI communicator of size 1 3432 . bs - size of block 3433 . m - number of rows 3434 . n - number of columns 3435 . i - row indices 3436 . j - column indices 3437 - a - matrix values 3438 3439 Output Parameter: 3440 . mat - the matrix 3441 3442 Level: advanced 3443 3444 Notes: 3445 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3446 once the matrix is destroyed 3447 3448 You cannot set new nonzero locations into this matrix, that will generate an error. 3449 3450 The i and j indices are 0 based 3451 3452 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3453 3454 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3455 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3456 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3457 with column-major ordering within blocks. 3458 3459 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3460 3461 @*/ 3462 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3463 { 3464 PetscErrorCode ierr; 3465 PetscInt ii; 3466 Mat_SeqBAIJ *baij; 3467 3468 PetscFunctionBegin; 3469 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3470 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3471 3472 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3473 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3474 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3475 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3476 baij = (Mat_SeqBAIJ*)(*mat)->data; 3477 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3478 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3479 3480 baij->i = i; 3481 baij->j = j; 3482 baij->a = a; 3483 3484 baij->singlemalloc = PETSC_FALSE; 3485 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3486 baij->free_a = PETSC_FALSE; 3487 baij->free_ij = PETSC_FALSE; 3488 3489 for (ii=0; ii<m; ii++) { 3490 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3491 #if defined(PETSC_USE_DEBUG) 3492 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3493 #endif 3494 } 3495 #if defined(PETSC_USE_DEBUG) 3496 for (ii=0; ii<baij->i[m]; ii++) { 3497 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3498 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3499 } 3500 #endif 3501 3502 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3503 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3504 PetscFunctionReturn(0); 3505 } 3506 3507 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3508 { 3509 PetscErrorCode ierr; 3510 PetscMPIInt size; 3511 3512 PetscFunctionBegin; 3513 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3514 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3515 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3516 } else { 3517 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3518 } 3519 PetscFunctionReturn(0); 3520 } 3521