1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "src/mat/impls/maij/maij.h" 21 #include "src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27 { 28 PetscErrorCode ierr; 29 PetscTruth ismpimaij,isseqmaij; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34 if (ismpimaij) { 35 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36 37 *B = b->A; 38 } else if (isseqmaij) { 39 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40 41 *B = b->AIJ; 42 } else { 43 *B = A; 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMAIJRedimension" 50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51 { 52 PetscErrorCode ierr; 53 Mat Aij; 54 55 PetscFunctionBegin; 56 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatDestroy_SeqMAIJ" 63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 64 { 65 PetscErrorCode ierr; 66 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 67 68 PetscFunctionBegin; 69 if (b->AIJ) { 70 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 71 } 72 ierr = PetscFree(b);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatView_SeqMAIJ" 78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 79 { 80 PetscErrorCode ierr; 81 Mat B; 82 83 PetscFunctionBegin; 84 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 85 ierr = MatView(B,viewer);CHKERRQ(ierr); 86 ierr = MatDestroy(B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatView_MPIMAIJ" 92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 93 { 94 PetscErrorCode ierr; 95 Mat B; 96 97 PetscFunctionBegin; 98 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 99 ierr = MatView(B,viewer);CHKERRQ(ierr); 100 ierr = MatDestroy(B);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatDestroy_MPIMAIJ" 106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 107 { 108 PetscErrorCode ierr; 109 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 110 111 PetscFunctionBegin; 112 if (b->AIJ) { 113 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 114 } 115 if (b->OAIJ) { 116 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117 } 118 if (b->A) { 119 ierr = MatDestroy(b->A);CHKERRQ(ierr); 120 } 121 if (b->ctx) { 122 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123 } 124 if (b->w) { 125 ierr = VecDestroy(b->w);CHKERRQ(ierr); 126 } 127 ierr = PetscFree(b);CHKERRQ(ierr); 128 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 /*MC 133 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 134 multicomponent problems, interpolating or restricting each component the same way independently. 135 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 136 137 Operations provided: 138 . MatMult 139 . MatMultTranspose 140 . MatMultAdd 141 . MatMultTransposeAdd 142 143 Level: advanced 144 145 .seealso: MatCreateSeqDense 146 M*/ 147 148 EXTERN_C_BEGIN 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatCreate_MAIJ" 151 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 152 { 153 PetscErrorCode ierr; 154 Mat_MPIMAIJ *b; 155 PetscMPIInt size; 156 157 PetscFunctionBegin; 158 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 159 A->data = (void*)b; 160 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 161 A->mapping = 0; 162 163 b->AIJ = 0; 164 b->dof = 0; 165 b->OAIJ = 0; 166 b->ctx = 0; 167 b->w = 0; 168 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 169 if (size == 1){ 170 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 171 } else { 172 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 173 } 174 PetscFunctionReturn(0); 175 } 176 EXTERN_C_END 177 178 /* --------------------------------------------------------------------------------------*/ 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatMult_SeqMAIJ_2" 181 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 182 { 183 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 185 PetscScalar *x,*y,*v,sum1, sum2; 186 PetscErrorCode ierr; 187 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 188 PetscInt n,i,jrow,j; 189 190 PetscFunctionBegin; 191 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 192 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 193 idx = a->j; 194 v = a->a; 195 ii = a->i; 196 197 for (i=0; i<m; i++) { 198 jrow = ii[i]; 199 n = ii[i+1] - jrow; 200 sum1 = 0.0; 201 sum2 = 0.0; 202 nonzerorow += (n>0); 203 for (j=0; j<n; j++) { 204 sum1 += v[jrow]*x[2*idx[jrow]]; 205 sum2 += v[jrow]*x[2*idx[jrow]+1]; 206 jrow++; 207 } 208 y[2*i] = sum1; 209 y[2*i+1] = sum2; 210 } 211 212 ierr = PetscLogFlops(4*a->nz - 2*nonzerorow);CHKERRQ(ierr); 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 220 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 221 { 222 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 223 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 224 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 225 PetscErrorCode ierr; 226 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 227 228 PetscFunctionBegin; 229 ierr = VecSet(yy,zero);CHKERRQ(ierr); 230 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 231 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 232 233 for (i=0; i<m; i++) { 234 idx = a->j + a->i[i] ; 235 v = a->a + a->i[i] ; 236 n = a->i[i+1] - a->i[i]; 237 alpha1 = x[2*i]; 238 alpha2 = x[2*i+1]; 239 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 240 } 241 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 242 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 249 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 250 { 251 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 252 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 253 PetscScalar *x,*y,*v,sum1, sum2; 254 PetscErrorCode ierr; 255 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 256 PetscInt n,i,jrow,j; 257 258 PetscFunctionBegin; 259 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 260 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 261 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 262 idx = a->j; 263 v = a->a; 264 ii = a->i; 265 266 for (i=0; i<m; i++) { 267 jrow = ii[i]; 268 n = ii[i+1] - jrow; 269 sum1 = 0.0; 270 sum2 = 0.0; 271 for (j=0; j<n; j++) { 272 sum1 += v[jrow]*x[2*idx[jrow]]; 273 sum2 += v[jrow]*x[2*idx[jrow]+1]; 274 jrow++; 275 } 276 y[2*i] += sum1; 277 y[2*i+1] += sum2; 278 } 279 280 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 281 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 282 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 287 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 288 { 289 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 290 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 291 PetscScalar *x,*y,*v,alpha1,alpha2; 292 PetscErrorCode ierr; 293 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 294 295 PetscFunctionBegin; 296 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 297 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 298 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 299 300 for (i=0; i<m; i++) { 301 idx = a->j + a->i[i] ; 302 v = a->a + a->i[i] ; 303 n = a->i[i+1] - a->i[i]; 304 alpha1 = x[2*i]; 305 alpha2 = x[2*i+1]; 306 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 307 } 308 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 309 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 310 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 /* --------------------------------------------------------------------------------------*/ 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatMult_SeqMAIJ_3" 316 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 317 { 318 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 319 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 320 PetscScalar *x,*y,*v,sum1, sum2, sum3; 321 PetscErrorCode ierr; 322 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 323 PetscInt n,i,jrow,j; 324 325 PetscFunctionBegin; 326 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 327 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 328 idx = a->j; 329 v = a->a; 330 ii = a->i; 331 332 for (i=0; i<m; i++) { 333 jrow = ii[i]; 334 n = ii[i+1] - jrow; 335 sum1 = 0.0; 336 sum2 = 0.0; 337 sum3 = 0.0; 338 nonzerorow += (n>0); 339 for (j=0; j<n; j++) { 340 sum1 += v[jrow]*x[3*idx[jrow]]; 341 sum2 += v[jrow]*x[3*idx[jrow]+1]; 342 sum3 += v[jrow]*x[3*idx[jrow]+2]; 343 jrow++; 344 } 345 y[3*i] = sum1; 346 y[3*i+1] = sum2; 347 y[3*i+2] = sum3; 348 } 349 350 ierr = PetscLogFlops(6*a->nz - 3*nonzerorow);CHKERRQ(ierr); 351 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 #undef __FUNCT__ 357 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 358 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 359 { 360 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 361 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 362 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 363 PetscErrorCode ierr; 364 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 365 366 PetscFunctionBegin; 367 ierr = VecSet(yy,zero);CHKERRQ(ierr); 368 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 369 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 370 371 for (i=0; i<m; i++) { 372 idx = a->j + a->i[i]; 373 v = a->a + a->i[i]; 374 n = a->i[i+1] - a->i[i]; 375 alpha1 = x[3*i]; 376 alpha2 = x[3*i+1]; 377 alpha3 = x[3*i+2]; 378 while (n-->0) { 379 y[3*(*idx)] += alpha1*(*v); 380 y[3*(*idx)+1] += alpha2*(*v); 381 y[3*(*idx)+2] += alpha3*(*v); 382 idx++; v++; 383 } 384 } 385 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 386 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 387 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 393 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 394 { 395 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 396 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 397 PetscScalar *x,*y,*v,sum1, sum2, sum3; 398 PetscErrorCode ierr; 399 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 400 PetscInt n,i,jrow,j; 401 402 PetscFunctionBegin; 403 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 404 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 405 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 406 idx = a->j; 407 v = a->a; 408 ii = a->i; 409 410 for (i=0; i<m; i++) { 411 jrow = ii[i]; 412 n = ii[i+1] - jrow; 413 sum1 = 0.0; 414 sum2 = 0.0; 415 sum3 = 0.0; 416 for (j=0; j<n; j++) { 417 sum1 += v[jrow]*x[3*idx[jrow]]; 418 sum2 += v[jrow]*x[3*idx[jrow]+1]; 419 sum3 += v[jrow]*x[3*idx[jrow]+2]; 420 jrow++; 421 } 422 y[3*i] += sum1; 423 y[3*i+1] += sum2; 424 y[3*i+2] += sum3; 425 } 426 427 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 429 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 434 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 435 { 436 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 437 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 438 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 439 PetscErrorCode ierr; 440 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 441 442 PetscFunctionBegin; 443 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 444 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 445 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 446 for (i=0; i<m; i++) { 447 idx = a->j + a->i[i] ; 448 v = a->a + a->i[i] ; 449 n = a->i[i+1] - a->i[i]; 450 alpha1 = x[3*i]; 451 alpha2 = x[3*i+1]; 452 alpha3 = x[3*i+2]; 453 while (n-->0) { 454 y[3*(*idx)] += alpha1*(*v); 455 y[3*(*idx)+1] += alpha2*(*v); 456 y[3*(*idx)+2] += alpha3*(*v); 457 idx++; v++; 458 } 459 } 460 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 461 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 462 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 /* ------------------------------------------------------------------------------*/ 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatMult_SeqMAIJ_4" 469 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 470 { 471 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 472 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 473 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 474 PetscErrorCode ierr; 475 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 476 PetscInt n,i,jrow,j; 477 478 PetscFunctionBegin; 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 481 idx = a->j; 482 v = a->a; 483 ii = a->i; 484 485 for (i=0; i<m; i++) { 486 jrow = ii[i]; 487 n = ii[i+1] - jrow; 488 sum1 = 0.0; 489 sum2 = 0.0; 490 sum3 = 0.0; 491 sum4 = 0.0; 492 nonzerorow += (n>0); 493 for (j=0; j<n; j++) { 494 sum1 += v[jrow]*x[4*idx[jrow]]; 495 sum2 += v[jrow]*x[4*idx[jrow]+1]; 496 sum3 += v[jrow]*x[4*idx[jrow]+2]; 497 sum4 += v[jrow]*x[4*idx[jrow]+3]; 498 jrow++; 499 } 500 y[4*i] = sum1; 501 y[4*i+1] = sum2; 502 y[4*i+2] = sum3; 503 y[4*i+3] = sum4; 504 } 505 506 ierr = PetscLogFlops(8*a->nz - 4*nonzerorow);CHKERRQ(ierr); 507 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 508 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 514 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 515 { 516 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 517 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 518 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 519 PetscErrorCode ierr; 520 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 521 522 PetscFunctionBegin; 523 ierr = VecSet(yy,zero);CHKERRQ(ierr); 524 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 525 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 526 for (i=0; i<m; i++) { 527 idx = a->j + a->i[i] ; 528 v = a->a + a->i[i] ; 529 n = a->i[i+1] - a->i[i]; 530 alpha1 = x[4*i]; 531 alpha2 = x[4*i+1]; 532 alpha3 = x[4*i+2]; 533 alpha4 = x[4*i+3]; 534 while (n-->0) { 535 y[4*(*idx)] += alpha1*(*v); 536 y[4*(*idx)+1] += alpha2*(*v); 537 y[4*(*idx)+2] += alpha3*(*v); 538 y[4*(*idx)+3] += alpha4*(*v); 539 idx++; v++; 540 } 541 } 542 ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 543 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 544 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 550 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 551 { 552 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 553 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 554 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 555 PetscErrorCode ierr; 556 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 557 PetscInt n,i,jrow,j; 558 559 PetscFunctionBegin; 560 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 561 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 563 idx = a->j; 564 v = a->a; 565 ii = a->i; 566 567 for (i=0; i<m; i++) { 568 jrow = ii[i]; 569 n = ii[i+1] - jrow; 570 sum1 = 0.0; 571 sum2 = 0.0; 572 sum3 = 0.0; 573 sum4 = 0.0; 574 for (j=0; j<n; j++) { 575 sum1 += v[jrow]*x[4*idx[jrow]]; 576 sum2 += v[jrow]*x[4*idx[jrow]+1]; 577 sum3 += v[jrow]*x[4*idx[jrow]+2]; 578 sum4 += v[jrow]*x[4*idx[jrow]+3]; 579 jrow++; 580 } 581 y[4*i] += sum1; 582 y[4*i+1] += sum2; 583 y[4*i+2] += sum3; 584 y[4*i+3] += sum4; 585 } 586 587 ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 588 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 589 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 594 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 595 { 596 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 597 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 598 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 599 PetscErrorCode ierr; 600 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 601 602 PetscFunctionBegin; 603 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 604 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 605 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 606 607 for (i=0; i<m; i++) { 608 idx = a->j + a->i[i] ; 609 v = a->a + a->i[i] ; 610 n = a->i[i+1] - a->i[i]; 611 alpha1 = x[4*i]; 612 alpha2 = x[4*i+1]; 613 alpha3 = x[4*i+2]; 614 alpha4 = x[4*i+3]; 615 while (n-->0) { 616 y[4*(*idx)] += alpha1*(*v); 617 y[4*(*idx)+1] += alpha2*(*v); 618 y[4*(*idx)+2] += alpha3*(*v); 619 y[4*(*idx)+3] += alpha4*(*v); 620 idx++; v++; 621 } 622 } 623 ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 624 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 625 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 /* ------------------------------------------------------------------------------*/ 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatMult_SeqMAIJ_5" 632 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 633 { 634 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 635 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 636 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 637 PetscErrorCode ierr; 638 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 639 PetscInt n,i,jrow,j; 640 641 PetscFunctionBegin; 642 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 643 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 644 idx = a->j; 645 v = a->a; 646 ii = a->i; 647 648 for (i=0; i<m; i++) { 649 jrow = ii[i]; 650 n = ii[i+1] - jrow; 651 sum1 = 0.0; 652 sum2 = 0.0; 653 sum3 = 0.0; 654 sum4 = 0.0; 655 sum5 = 0.0; 656 nonzerorow += (n>0); 657 for (j=0; j<n; j++) { 658 sum1 += v[jrow]*x[5*idx[jrow]]; 659 sum2 += v[jrow]*x[5*idx[jrow]+1]; 660 sum3 += v[jrow]*x[5*idx[jrow]+2]; 661 sum4 += v[jrow]*x[5*idx[jrow]+3]; 662 sum5 += v[jrow]*x[5*idx[jrow]+4]; 663 jrow++; 664 } 665 y[5*i] = sum1; 666 y[5*i+1] = sum2; 667 y[5*i+2] = sum3; 668 y[5*i+3] = sum4; 669 y[5*i+4] = sum5; 670 } 671 672 ierr = PetscLogFlops(10*a->nz - 5*nonzerorow);CHKERRQ(ierr); 673 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 674 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 680 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 681 { 682 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 683 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 684 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 685 PetscErrorCode ierr; 686 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 687 688 PetscFunctionBegin; 689 ierr = VecSet(yy,zero);CHKERRQ(ierr); 690 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 691 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 692 693 for (i=0; i<m; i++) { 694 idx = a->j + a->i[i] ; 695 v = a->a + a->i[i] ; 696 n = a->i[i+1] - a->i[i]; 697 alpha1 = x[5*i]; 698 alpha2 = x[5*i+1]; 699 alpha3 = x[5*i+2]; 700 alpha4 = x[5*i+3]; 701 alpha5 = x[5*i+4]; 702 while (n-->0) { 703 y[5*(*idx)] += alpha1*(*v); 704 y[5*(*idx)+1] += alpha2*(*v); 705 y[5*(*idx)+2] += alpha3*(*v); 706 y[5*(*idx)+3] += alpha4*(*v); 707 y[5*(*idx)+4] += alpha5*(*v); 708 idx++; v++; 709 } 710 } 711 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 712 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 713 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 719 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 720 { 721 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 722 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 723 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 724 PetscErrorCode ierr; 725 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 726 PetscInt n,i,jrow,j; 727 728 PetscFunctionBegin; 729 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 730 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 731 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 732 idx = a->j; 733 v = a->a; 734 ii = a->i; 735 736 for (i=0; i<m; i++) { 737 jrow = ii[i]; 738 n = ii[i+1] - jrow; 739 sum1 = 0.0; 740 sum2 = 0.0; 741 sum3 = 0.0; 742 sum4 = 0.0; 743 sum5 = 0.0; 744 for (j=0; j<n; j++) { 745 sum1 += v[jrow]*x[5*idx[jrow]]; 746 sum2 += v[jrow]*x[5*idx[jrow]+1]; 747 sum3 += v[jrow]*x[5*idx[jrow]+2]; 748 sum4 += v[jrow]*x[5*idx[jrow]+3]; 749 sum5 += v[jrow]*x[5*idx[jrow]+4]; 750 jrow++; 751 } 752 y[5*i] += sum1; 753 y[5*i+1] += sum2; 754 y[5*i+2] += sum3; 755 y[5*i+3] += sum4; 756 y[5*i+4] += sum5; 757 } 758 759 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 760 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 761 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 767 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 768 { 769 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 770 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 771 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 772 PetscErrorCode ierr; 773 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 774 775 PetscFunctionBegin; 776 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 777 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 778 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 779 780 for (i=0; i<m; i++) { 781 idx = a->j + a->i[i] ; 782 v = a->a + a->i[i] ; 783 n = a->i[i+1] - a->i[i]; 784 alpha1 = x[5*i]; 785 alpha2 = x[5*i+1]; 786 alpha3 = x[5*i+2]; 787 alpha4 = x[5*i+3]; 788 alpha5 = x[5*i+4]; 789 while (n-->0) { 790 y[5*(*idx)] += alpha1*(*v); 791 y[5*(*idx)+1] += alpha2*(*v); 792 y[5*(*idx)+2] += alpha3*(*v); 793 y[5*(*idx)+3] += alpha4*(*v); 794 y[5*(*idx)+4] += alpha5*(*v); 795 idx++; v++; 796 } 797 } 798 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } 803 804 /* ------------------------------------------------------------------------------*/ 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatMult_SeqMAIJ_6" 807 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 808 { 809 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 810 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 811 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 812 PetscErrorCode ierr; 813 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 814 PetscInt n,i,jrow,j; 815 816 PetscFunctionBegin; 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 819 idx = a->j; 820 v = a->a; 821 ii = a->i; 822 823 for (i=0; i<m; i++) { 824 jrow = ii[i]; 825 n = ii[i+1] - jrow; 826 sum1 = 0.0; 827 sum2 = 0.0; 828 sum3 = 0.0; 829 sum4 = 0.0; 830 sum5 = 0.0; 831 sum6 = 0.0; 832 nonzerorow += (n>0); 833 for (j=0; j<n; j++) { 834 sum1 += v[jrow]*x[6*idx[jrow]]; 835 sum2 += v[jrow]*x[6*idx[jrow]+1]; 836 sum3 += v[jrow]*x[6*idx[jrow]+2]; 837 sum4 += v[jrow]*x[6*idx[jrow]+3]; 838 sum5 += v[jrow]*x[6*idx[jrow]+4]; 839 sum6 += v[jrow]*x[6*idx[jrow]+5]; 840 jrow++; 841 } 842 y[6*i] = sum1; 843 y[6*i+1] = sum2; 844 y[6*i+2] = sum3; 845 y[6*i+3] = sum4; 846 y[6*i+4] = sum5; 847 y[6*i+5] = sum6; 848 } 849 850 ierr = PetscLogFlops(12*a->nz - 6*nonzerorow);CHKERRQ(ierr); 851 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 852 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } 855 856 #undef __FUNCT__ 857 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 858 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 859 { 860 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 861 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 862 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 863 PetscErrorCode ierr; 864 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 865 866 PetscFunctionBegin; 867 ierr = VecSet(yy,zero);CHKERRQ(ierr); 868 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 869 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 870 871 for (i=0; i<m; i++) { 872 idx = a->j + a->i[i] ; 873 v = a->a + a->i[i] ; 874 n = a->i[i+1] - a->i[i]; 875 alpha1 = x[6*i]; 876 alpha2 = x[6*i+1]; 877 alpha3 = x[6*i+2]; 878 alpha4 = x[6*i+3]; 879 alpha5 = x[6*i+4]; 880 alpha6 = x[6*i+5]; 881 while (n-->0) { 882 y[6*(*idx)] += alpha1*(*v); 883 y[6*(*idx)+1] += alpha2*(*v); 884 y[6*(*idx)+2] += alpha3*(*v); 885 y[6*(*idx)+3] += alpha4*(*v); 886 y[6*(*idx)+4] += alpha5*(*v); 887 y[6*(*idx)+5] += alpha6*(*v); 888 idx++; v++; 889 } 890 } 891 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 892 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 893 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 899 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 900 { 901 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 902 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 903 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 904 PetscErrorCode ierr; 905 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 906 PetscInt n,i,jrow,j; 907 908 PetscFunctionBegin; 909 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 910 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 911 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 912 idx = a->j; 913 v = a->a; 914 ii = a->i; 915 916 for (i=0; i<m; i++) { 917 jrow = ii[i]; 918 n = ii[i+1] - jrow; 919 sum1 = 0.0; 920 sum2 = 0.0; 921 sum3 = 0.0; 922 sum4 = 0.0; 923 sum5 = 0.0; 924 sum6 = 0.0; 925 for (j=0; j<n; j++) { 926 sum1 += v[jrow]*x[6*idx[jrow]]; 927 sum2 += v[jrow]*x[6*idx[jrow]+1]; 928 sum3 += v[jrow]*x[6*idx[jrow]+2]; 929 sum4 += v[jrow]*x[6*idx[jrow]+3]; 930 sum5 += v[jrow]*x[6*idx[jrow]+4]; 931 sum6 += v[jrow]*x[6*idx[jrow]+5]; 932 jrow++; 933 } 934 y[6*i] += sum1; 935 y[6*i+1] += sum2; 936 y[6*i+2] += sum3; 937 y[6*i+3] += sum4; 938 y[6*i+4] += sum5; 939 y[6*i+5] += sum6; 940 } 941 942 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 943 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 944 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 950 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 951 { 952 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 953 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 954 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 955 PetscErrorCode ierr; 956 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 957 958 PetscFunctionBegin; 959 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 960 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 961 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 962 963 for (i=0; i<m; i++) { 964 idx = a->j + a->i[i] ; 965 v = a->a + a->i[i] ; 966 n = a->i[i+1] - a->i[i]; 967 alpha1 = x[6*i]; 968 alpha2 = x[6*i+1]; 969 alpha3 = x[6*i+2]; 970 alpha4 = x[6*i+3]; 971 alpha5 = x[6*i+4]; 972 alpha6 = x[6*i+5]; 973 while (n-->0) { 974 y[6*(*idx)] += alpha1*(*v); 975 y[6*(*idx)+1] += alpha2*(*v); 976 y[6*(*idx)+2] += alpha3*(*v); 977 y[6*(*idx)+3] += alpha4*(*v); 978 y[6*(*idx)+4] += alpha5*(*v); 979 y[6*(*idx)+5] += alpha6*(*v); 980 idx++; v++; 981 } 982 } 983 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 984 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 985 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 /* ------------------------------------------------------------------------------*/ 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatMult_SeqMAIJ_7" 992 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 993 { 994 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 995 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 996 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 997 PetscErrorCode ierr; 998 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 999 PetscInt n,i,jrow,j; 1000 1001 PetscFunctionBegin; 1002 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1003 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1004 idx = a->j; 1005 v = a->a; 1006 ii = a->i; 1007 1008 for (i=0; i<m; i++) { 1009 jrow = ii[i]; 1010 n = ii[i+1] - jrow; 1011 sum1 = 0.0; 1012 sum2 = 0.0; 1013 sum3 = 0.0; 1014 sum4 = 0.0; 1015 sum5 = 0.0; 1016 sum6 = 0.0; 1017 sum7 = 0.0; 1018 nonzerorow += (n>0); 1019 for (j=0; j<n; j++) { 1020 sum1 += v[jrow]*x[7*idx[jrow]]; 1021 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1022 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1023 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1024 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1025 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1026 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1027 jrow++; 1028 } 1029 y[7*i] = sum1; 1030 y[7*i+1] = sum2; 1031 y[7*i+2] = sum3; 1032 y[7*i+3] = sum4; 1033 y[7*i+4] = sum5; 1034 y[7*i+5] = sum6; 1035 y[7*i+6] = sum7; 1036 } 1037 1038 ierr = PetscLogFlops(14*a->nz - 7*nonzerorow);CHKERRQ(ierr); 1039 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1040 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1046 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1047 { 1048 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1049 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1050 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1051 PetscErrorCode ierr; 1052 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1053 1054 PetscFunctionBegin; 1055 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1056 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1057 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1058 1059 for (i=0; i<m; i++) { 1060 idx = a->j + a->i[i] ; 1061 v = a->a + a->i[i] ; 1062 n = a->i[i+1] - a->i[i]; 1063 alpha1 = x[7*i]; 1064 alpha2 = x[7*i+1]; 1065 alpha3 = x[7*i+2]; 1066 alpha4 = x[7*i+3]; 1067 alpha5 = x[7*i+4]; 1068 alpha6 = x[7*i+5]; 1069 alpha7 = x[7*i+6]; 1070 while (n-->0) { 1071 y[7*(*idx)] += alpha1*(*v); 1072 y[7*(*idx)+1] += alpha2*(*v); 1073 y[7*(*idx)+2] += alpha3*(*v); 1074 y[7*(*idx)+3] += alpha4*(*v); 1075 y[7*(*idx)+4] += alpha5*(*v); 1076 y[7*(*idx)+5] += alpha6*(*v); 1077 y[7*(*idx)+6] += alpha7*(*v); 1078 idx++; v++; 1079 } 1080 } 1081 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1082 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1083 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1089 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1090 { 1091 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1092 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1093 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1094 PetscErrorCode ierr; 1095 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1096 PetscInt n,i,jrow,j; 1097 1098 PetscFunctionBegin; 1099 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1100 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1101 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1102 idx = a->j; 1103 v = a->a; 1104 ii = a->i; 1105 1106 for (i=0; i<m; i++) { 1107 jrow = ii[i]; 1108 n = ii[i+1] - jrow; 1109 sum1 = 0.0; 1110 sum2 = 0.0; 1111 sum3 = 0.0; 1112 sum4 = 0.0; 1113 sum5 = 0.0; 1114 sum6 = 0.0; 1115 sum7 = 0.0; 1116 for (j=0; j<n; j++) { 1117 sum1 += v[jrow]*x[7*idx[jrow]]; 1118 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1119 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1120 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1121 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1122 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1123 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1124 jrow++; 1125 } 1126 y[7*i] += sum1; 1127 y[7*i+1] += sum2; 1128 y[7*i+2] += sum3; 1129 y[7*i+3] += sum4; 1130 y[7*i+4] += sum5; 1131 y[7*i+5] += sum6; 1132 y[7*i+6] += sum7; 1133 } 1134 1135 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1136 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1137 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1143 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1144 { 1145 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1147 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1148 PetscErrorCode ierr; 1149 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1150 1151 PetscFunctionBegin; 1152 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1153 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1154 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1155 for (i=0; i<m; i++) { 1156 idx = a->j + a->i[i] ; 1157 v = a->a + a->i[i] ; 1158 n = a->i[i+1] - a->i[i]; 1159 alpha1 = x[7*i]; 1160 alpha2 = x[7*i+1]; 1161 alpha3 = x[7*i+2]; 1162 alpha4 = x[7*i+3]; 1163 alpha5 = x[7*i+4]; 1164 alpha6 = x[7*i+5]; 1165 alpha7 = x[7*i+6]; 1166 while (n-->0) { 1167 y[7*(*idx)] += alpha1*(*v); 1168 y[7*(*idx)+1] += alpha2*(*v); 1169 y[7*(*idx)+2] += alpha3*(*v); 1170 y[7*(*idx)+3] += alpha4*(*v); 1171 y[7*(*idx)+4] += alpha5*(*v); 1172 y[7*(*idx)+5] += alpha6*(*v); 1173 y[7*(*idx)+6] += alpha7*(*v); 1174 idx++; v++; 1175 } 1176 } 1177 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1178 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1179 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1185 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1186 { 1187 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1188 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1189 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1190 PetscErrorCode ierr; 1191 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 1192 PetscInt n,i,jrow,j; 1193 1194 PetscFunctionBegin; 1195 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1196 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1197 idx = a->j; 1198 v = a->a; 1199 ii = a->i; 1200 1201 for (i=0; i<m; i++) { 1202 jrow = ii[i]; 1203 n = ii[i+1] - jrow; 1204 sum1 = 0.0; 1205 sum2 = 0.0; 1206 sum3 = 0.0; 1207 sum4 = 0.0; 1208 sum5 = 0.0; 1209 sum6 = 0.0; 1210 sum7 = 0.0; 1211 sum8 = 0.0; 1212 nonzerorow += (n>0); 1213 for (j=0; j<n; j++) { 1214 sum1 += v[jrow]*x[8*idx[jrow]]; 1215 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1216 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1217 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1218 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1219 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1220 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1221 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1222 jrow++; 1223 } 1224 y[8*i] = sum1; 1225 y[8*i+1] = sum2; 1226 y[8*i+2] = sum3; 1227 y[8*i+3] = sum4; 1228 y[8*i+4] = sum5; 1229 y[8*i+5] = sum6; 1230 y[8*i+6] = sum7; 1231 y[8*i+7] = sum8; 1232 } 1233 1234 ierr = PetscLogFlops(16*a->nz - 8*nonzerorow);CHKERRQ(ierr); 1235 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1236 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1242 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1243 { 1244 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1245 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1246 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1247 PetscErrorCode ierr; 1248 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1249 1250 PetscFunctionBegin; 1251 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1252 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1253 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1254 1255 for (i=0; i<m; i++) { 1256 idx = a->j + a->i[i] ; 1257 v = a->a + a->i[i] ; 1258 n = a->i[i+1] - a->i[i]; 1259 alpha1 = x[8*i]; 1260 alpha2 = x[8*i+1]; 1261 alpha3 = x[8*i+2]; 1262 alpha4 = x[8*i+3]; 1263 alpha5 = x[8*i+4]; 1264 alpha6 = x[8*i+5]; 1265 alpha7 = x[8*i+6]; 1266 alpha8 = x[8*i+7]; 1267 while (n-->0) { 1268 y[8*(*idx)] += alpha1*(*v); 1269 y[8*(*idx)+1] += alpha2*(*v); 1270 y[8*(*idx)+2] += alpha3*(*v); 1271 y[8*(*idx)+3] += alpha4*(*v); 1272 y[8*(*idx)+4] += alpha5*(*v); 1273 y[8*(*idx)+5] += alpha6*(*v); 1274 y[8*(*idx)+6] += alpha7*(*v); 1275 y[8*(*idx)+7] += alpha8*(*v); 1276 idx++; v++; 1277 } 1278 } 1279 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1280 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1281 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1287 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1288 { 1289 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1290 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1291 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1292 PetscErrorCode ierr; 1293 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1294 PetscInt n,i,jrow,j; 1295 1296 PetscFunctionBegin; 1297 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1298 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1299 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1300 idx = a->j; 1301 v = a->a; 1302 ii = a->i; 1303 1304 for (i=0; i<m; i++) { 1305 jrow = ii[i]; 1306 n = ii[i+1] - jrow; 1307 sum1 = 0.0; 1308 sum2 = 0.0; 1309 sum3 = 0.0; 1310 sum4 = 0.0; 1311 sum5 = 0.0; 1312 sum6 = 0.0; 1313 sum7 = 0.0; 1314 sum8 = 0.0; 1315 for (j=0; j<n; j++) { 1316 sum1 += v[jrow]*x[8*idx[jrow]]; 1317 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1318 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1319 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1320 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1321 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1322 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1323 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1324 jrow++; 1325 } 1326 y[8*i] += sum1; 1327 y[8*i+1] += sum2; 1328 y[8*i+2] += sum3; 1329 y[8*i+3] += sum4; 1330 y[8*i+4] += sum5; 1331 y[8*i+5] += sum6; 1332 y[8*i+6] += sum7; 1333 y[8*i+7] += sum8; 1334 } 1335 1336 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1337 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1338 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1344 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1345 { 1346 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1347 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1348 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1349 PetscErrorCode ierr; 1350 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1351 1352 PetscFunctionBegin; 1353 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1354 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1355 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1356 for (i=0; i<m; i++) { 1357 idx = a->j + a->i[i] ; 1358 v = a->a + a->i[i] ; 1359 n = a->i[i+1] - a->i[i]; 1360 alpha1 = x[8*i]; 1361 alpha2 = x[8*i+1]; 1362 alpha3 = x[8*i+2]; 1363 alpha4 = x[8*i+3]; 1364 alpha5 = x[8*i+4]; 1365 alpha6 = x[8*i+5]; 1366 alpha7 = x[8*i+6]; 1367 alpha8 = x[8*i+7]; 1368 while (n-->0) { 1369 y[8*(*idx)] += alpha1*(*v); 1370 y[8*(*idx)+1] += alpha2*(*v); 1371 y[8*(*idx)+2] += alpha3*(*v); 1372 y[8*(*idx)+3] += alpha4*(*v); 1373 y[8*(*idx)+4] += alpha5*(*v); 1374 y[8*(*idx)+5] += alpha6*(*v); 1375 y[8*(*idx)+6] += alpha7*(*v); 1376 y[8*(*idx)+7] += alpha8*(*v); 1377 idx++; v++; 1378 } 1379 } 1380 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1381 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1382 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 /* ------------------------------------------------------------------------------*/ 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1389 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1390 { 1391 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1392 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1393 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1394 PetscErrorCode ierr; 1395 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 1396 PetscInt n,i,jrow,j; 1397 1398 PetscFunctionBegin; 1399 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1400 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1401 idx = a->j; 1402 v = a->a; 1403 ii = a->i; 1404 1405 for (i=0; i<m; i++) { 1406 jrow = ii[i]; 1407 n = ii[i+1] - jrow; 1408 sum1 = 0.0; 1409 sum2 = 0.0; 1410 sum3 = 0.0; 1411 sum4 = 0.0; 1412 sum5 = 0.0; 1413 sum6 = 0.0; 1414 sum7 = 0.0; 1415 sum8 = 0.0; 1416 sum9 = 0.0; 1417 nonzerorow += (n>0); 1418 for (j=0; j<n; j++) { 1419 sum1 += v[jrow]*x[9*idx[jrow]]; 1420 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1421 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1422 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1423 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1424 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1425 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1426 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1427 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1428 jrow++; 1429 } 1430 y[9*i] = sum1; 1431 y[9*i+1] = sum2; 1432 y[9*i+2] = sum3; 1433 y[9*i+3] = sum4; 1434 y[9*i+4] = sum5; 1435 y[9*i+5] = sum6; 1436 y[9*i+6] = sum7; 1437 y[9*i+7] = sum8; 1438 y[9*i+8] = sum9; 1439 } 1440 1441 ierr = PetscLogFlops(18*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1442 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1443 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 /* ------------------------------------------------------------------------------*/ 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1451 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1452 { 1453 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1454 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1455 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1456 PetscErrorCode ierr; 1457 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1458 1459 PetscFunctionBegin; 1460 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1461 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1462 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1463 1464 for (i=0; i<m; i++) { 1465 idx = a->j + a->i[i] ; 1466 v = a->a + a->i[i] ; 1467 n = a->i[i+1] - a->i[i]; 1468 alpha1 = x[9*i]; 1469 alpha2 = x[9*i+1]; 1470 alpha3 = x[9*i+2]; 1471 alpha4 = x[9*i+3]; 1472 alpha5 = x[9*i+4]; 1473 alpha6 = x[9*i+5]; 1474 alpha7 = x[9*i+6]; 1475 alpha8 = x[9*i+7]; 1476 alpha9 = x[9*i+8]; 1477 while (n-->0) { 1478 y[9*(*idx)] += alpha1*(*v); 1479 y[9*(*idx)+1] += alpha2*(*v); 1480 y[9*(*idx)+2] += alpha3*(*v); 1481 y[9*(*idx)+3] += alpha4*(*v); 1482 y[9*(*idx)+4] += alpha5*(*v); 1483 y[9*(*idx)+5] += alpha6*(*v); 1484 y[9*(*idx)+6] += alpha7*(*v); 1485 y[9*(*idx)+7] += alpha8*(*v); 1486 y[9*(*idx)+8] += alpha9*(*v); 1487 idx++; v++; 1488 } 1489 } 1490 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1491 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1492 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1498 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1499 { 1500 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1501 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1502 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1503 PetscErrorCode ierr; 1504 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1505 PetscInt n,i,jrow,j; 1506 1507 PetscFunctionBegin; 1508 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1509 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1510 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1511 idx = a->j; 1512 v = a->a; 1513 ii = a->i; 1514 1515 for (i=0; i<m; i++) { 1516 jrow = ii[i]; 1517 n = ii[i+1] - jrow; 1518 sum1 = 0.0; 1519 sum2 = 0.0; 1520 sum3 = 0.0; 1521 sum4 = 0.0; 1522 sum5 = 0.0; 1523 sum6 = 0.0; 1524 sum7 = 0.0; 1525 sum8 = 0.0; 1526 sum9 = 0.0; 1527 for (j=0; j<n; j++) { 1528 sum1 += v[jrow]*x[9*idx[jrow]]; 1529 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1530 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1531 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1532 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1533 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1534 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1535 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1536 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1537 jrow++; 1538 } 1539 y[9*i] += sum1; 1540 y[9*i+1] += sum2; 1541 y[9*i+2] += sum3; 1542 y[9*i+3] += sum4; 1543 y[9*i+4] += sum5; 1544 y[9*i+5] += sum6; 1545 y[9*i+6] += sum7; 1546 y[9*i+7] += sum8; 1547 y[9*i+8] += sum9; 1548 } 1549 1550 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1551 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1552 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1558 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1559 { 1560 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1561 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1562 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1563 PetscErrorCode ierr; 1564 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1565 1566 PetscFunctionBegin; 1567 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1568 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1569 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1570 for (i=0; i<m; i++) { 1571 idx = a->j + a->i[i] ; 1572 v = a->a + a->i[i] ; 1573 n = a->i[i+1] - a->i[i]; 1574 alpha1 = x[9*i]; 1575 alpha2 = x[9*i+1]; 1576 alpha3 = x[9*i+2]; 1577 alpha4 = x[9*i+3]; 1578 alpha5 = x[9*i+4]; 1579 alpha6 = x[9*i+5]; 1580 alpha7 = x[9*i+6]; 1581 alpha8 = x[9*i+7]; 1582 alpha9 = x[9*i+8]; 1583 while (n-->0) { 1584 y[9*(*idx)] += alpha1*(*v); 1585 y[9*(*idx)+1] += alpha2*(*v); 1586 y[9*(*idx)+2] += alpha3*(*v); 1587 y[9*(*idx)+3] += alpha4*(*v); 1588 y[9*(*idx)+4] += alpha5*(*v); 1589 y[9*(*idx)+5] += alpha6*(*v); 1590 y[9*(*idx)+6] += alpha7*(*v); 1591 y[9*(*idx)+7] += alpha8*(*v); 1592 y[9*(*idx)+8] += alpha9*(*v); 1593 idx++; v++; 1594 } 1595 } 1596 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1597 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1598 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 /*--------------------------------------------------------------------------------------------*/ 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1604 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1605 { 1606 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1607 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1608 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1609 PetscErrorCode ierr; 1610 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 1611 PetscInt n,i,jrow,j; 1612 1613 PetscFunctionBegin; 1614 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1615 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1616 idx = a->j; 1617 v = a->a; 1618 ii = a->i; 1619 1620 for (i=0; i<m; i++) { 1621 jrow = ii[i]; 1622 n = ii[i+1] - jrow; 1623 sum1 = 0.0; 1624 sum2 = 0.0; 1625 sum3 = 0.0; 1626 sum4 = 0.0; 1627 sum5 = 0.0; 1628 sum6 = 0.0; 1629 sum7 = 0.0; 1630 sum8 = 0.0; 1631 sum9 = 0.0; 1632 sum10 = 0.0; 1633 nonzerorow += (n>0); 1634 for (j=0; j<n; j++) { 1635 sum1 += v[jrow]*x[10*idx[jrow]]; 1636 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1637 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1638 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1639 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1640 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1641 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1642 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1643 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1644 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1645 jrow++; 1646 } 1647 y[10*i] = sum1; 1648 y[10*i+1] = sum2; 1649 y[10*i+2] = sum3; 1650 y[10*i+3] = sum4; 1651 y[10*i+4] = sum5; 1652 y[10*i+5] = sum6; 1653 y[10*i+6] = sum7; 1654 y[10*i+7] = sum8; 1655 y[10*i+8] = sum9; 1656 y[10*i+9] = sum10; 1657 } 1658 1659 ierr = PetscLogFlops(20*a->nz - 10*nonzerorow);CHKERRQ(ierr); 1660 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1661 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1667 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1668 { 1669 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1670 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1671 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1672 PetscErrorCode ierr; 1673 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1674 PetscInt n,i,jrow,j; 1675 1676 PetscFunctionBegin; 1677 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1678 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1679 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1680 idx = a->j; 1681 v = a->a; 1682 ii = a->i; 1683 1684 for (i=0; i<m; i++) { 1685 jrow = ii[i]; 1686 n = ii[i+1] - jrow; 1687 sum1 = 0.0; 1688 sum2 = 0.0; 1689 sum3 = 0.0; 1690 sum4 = 0.0; 1691 sum5 = 0.0; 1692 sum6 = 0.0; 1693 sum7 = 0.0; 1694 sum8 = 0.0; 1695 sum9 = 0.0; 1696 sum10 = 0.0; 1697 for (j=0; j<n; j++) { 1698 sum1 += v[jrow]*x[10*idx[jrow]]; 1699 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1700 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1701 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1702 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1703 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1704 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1705 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1706 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1707 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1708 jrow++; 1709 } 1710 y[10*i] += sum1; 1711 y[10*i+1] += sum2; 1712 y[10*i+2] += sum3; 1713 y[10*i+3] += sum4; 1714 y[10*i+4] += sum5; 1715 y[10*i+5] += sum6; 1716 y[10*i+6] += sum7; 1717 y[10*i+7] += sum8; 1718 y[10*i+8] += sum9; 1719 y[10*i+9] += sum10; 1720 } 1721 1722 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1723 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1724 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1725 PetscFunctionReturn(0); 1726 } 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1730 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1731 { 1732 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1733 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1734 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1735 PetscErrorCode ierr; 1736 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1737 1738 PetscFunctionBegin; 1739 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1740 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1741 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1742 1743 for (i=0; i<m; i++) { 1744 idx = a->j + a->i[i] ; 1745 v = a->a + a->i[i] ; 1746 n = a->i[i+1] - a->i[i]; 1747 alpha1 = x[10*i]; 1748 alpha2 = x[10*i+1]; 1749 alpha3 = x[10*i+2]; 1750 alpha4 = x[10*i+3]; 1751 alpha5 = x[10*i+4]; 1752 alpha6 = x[10*i+5]; 1753 alpha7 = x[10*i+6]; 1754 alpha8 = x[10*i+7]; 1755 alpha9 = x[10*i+8]; 1756 alpha10 = x[10*i+9]; 1757 while (n-->0) { 1758 y[10*(*idx)] += alpha1*(*v); 1759 y[10*(*idx)+1] += alpha2*(*v); 1760 y[10*(*idx)+2] += alpha3*(*v); 1761 y[10*(*idx)+3] += alpha4*(*v); 1762 y[10*(*idx)+4] += alpha5*(*v); 1763 y[10*(*idx)+5] += alpha6*(*v); 1764 y[10*(*idx)+6] += alpha7*(*v); 1765 y[10*(*idx)+7] += alpha8*(*v); 1766 y[10*(*idx)+8] += alpha9*(*v); 1767 y[10*(*idx)+9] += alpha10*(*v); 1768 idx++; v++; 1769 } 1770 } 1771 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1772 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1773 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1779 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1780 { 1781 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1782 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1783 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1784 PetscErrorCode ierr; 1785 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1786 1787 PetscFunctionBegin; 1788 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1789 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1790 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1791 for (i=0; i<m; i++) { 1792 idx = a->j + a->i[i] ; 1793 v = a->a + a->i[i] ; 1794 n = a->i[i+1] - a->i[i]; 1795 alpha1 = x[10*i]; 1796 alpha2 = x[10*i+1]; 1797 alpha3 = x[10*i+2]; 1798 alpha4 = x[10*i+3]; 1799 alpha5 = x[10*i+4]; 1800 alpha6 = x[10*i+5]; 1801 alpha7 = x[10*i+6]; 1802 alpha8 = x[10*i+7]; 1803 alpha9 = x[10*i+8]; 1804 alpha10 = x[10*i+9]; 1805 while (n-->0) { 1806 y[10*(*idx)] += alpha1*(*v); 1807 y[10*(*idx)+1] += alpha2*(*v); 1808 y[10*(*idx)+2] += alpha3*(*v); 1809 y[10*(*idx)+3] += alpha4*(*v); 1810 y[10*(*idx)+4] += alpha5*(*v); 1811 y[10*(*idx)+5] += alpha6*(*v); 1812 y[10*(*idx)+6] += alpha7*(*v); 1813 y[10*(*idx)+7] += alpha8*(*v); 1814 y[10*(*idx)+8] += alpha9*(*v); 1815 y[10*(*idx)+9] += alpha10*(*v); 1816 idx++; v++; 1817 } 1818 } 1819 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1820 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1821 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 1826 /*--------------------------------------------------------------------------------------------*/ 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1829 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1830 { 1831 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1832 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1833 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1834 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1835 PetscErrorCode ierr; 1836 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 1837 PetscInt n,i,jrow,j; 1838 1839 PetscFunctionBegin; 1840 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1841 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1842 idx = a->j; 1843 v = a->a; 1844 ii = a->i; 1845 1846 for (i=0; i<m; i++) { 1847 jrow = ii[i]; 1848 n = ii[i+1] - jrow; 1849 sum1 = 0.0; 1850 sum2 = 0.0; 1851 sum3 = 0.0; 1852 sum4 = 0.0; 1853 sum5 = 0.0; 1854 sum6 = 0.0; 1855 sum7 = 0.0; 1856 sum8 = 0.0; 1857 sum9 = 0.0; 1858 sum10 = 0.0; 1859 sum11 = 0.0; 1860 sum12 = 0.0; 1861 sum13 = 0.0; 1862 sum14 = 0.0; 1863 sum15 = 0.0; 1864 sum16 = 0.0; 1865 nonzerorow += (n>0); 1866 for (j=0; j<n; j++) { 1867 sum1 += v[jrow]*x[16*idx[jrow]]; 1868 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1869 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1870 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1871 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1872 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1873 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1874 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1875 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1876 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1877 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1878 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1879 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1880 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1881 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1882 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1883 jrow++; 1884 } 1885 y[16*i] = sum1; 1886 y[16*i+1] = sum2; 1887 y[16*i+2] = sum3; 1888 y[16*i+3] = sum4; 1889 y[16*i+4] = sum5; 1890 y[16*i+5] = sum6; 1891 y[16*i+6] = sum7; 1892 y[16*i+7] = sum8; 1893 y[16*i+8] = sum9; 1894 y[16*i+9] = sum10; 1895 y[16*i+10] = sum11; 1896 y[16*i+11] = sum12; 1897 y[16*i+12] = sum13; 1898 y[16*i+13] = sum14; 1899 y[16*i+14] = sum15; 1900 y[16*i+15] = sum16; 1901 } 1902 1903 ierr = PetscLogFlops(32*a->nz - 16*nonzerorow);CHKERRQ(ierr); 1904 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1905 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1911 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1912 { 1913 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1914 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1915 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1916 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1917 PetscErrorCode ierr; 1918 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1919 1920 PetscFunctionBegin; 1921 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1922 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1923 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1924 1925 for (i=0; i<m; i++) { 1926 idx = a->j + a->i[i] ; 1927 v = a->a + a->i[i] ; 1928 n = a->i[i+1] - a->i[i]; 1929 alpha1 = x[16*i]; 1930 alpha2 = x[16*i+1]; 1931 alpha3 = x[16*i+2]; 1932 alpha4 = x[16*i+3]; 1933 alpha5 = x[16*i+4]; 1934 alpha6 = x[16*i+5]; 1935 alpha7 = x[16*i+6]; 1936 alpha8 = x[16*i+7]; 1937 alpha9 = x[16*i+8]; 1938 alpha10 = x[16*i+9]; 1939 alpha11 = x[16*i+10]; 1940 alpha12 = x[16*i+11]; 1941 alpha13 = x[16*i+12]; 1942 alpha14 = x[16*i+13]; 1943 alpha15 = x[16*i+14]; 1944 alpha16 = x[16*i+15]; 1945 while (n-->0) { 1946 y[16*(*idx)] += alpha1*(*v); 1947 y[16*(*idx)+1] += alpha2*(*v); 1948 y[16*(*idx)+2] += alpha3*(*v); 1949 y[16*(*idx)+3] += alpha4*(*v); 1950 y[16*(*idx)+4] += alpha5*(*v); 1951 y[16*(*idx)+5] += alpha6*(*v); 1952 y[16*(*idx)+6] += alpha7*(*v); 1953 y[16*(*idx)+7] += alpha8*(*v); 1954 y[16*(*idx)+8] += alpha9*(*v); 1955 y[16*(*idx)+9] += alpha10*(*v); 1956 y[16*(*idx)+10] += alpha11*(*v); 1957 y[16*(*idx)+11] += alpha12*(*v); 1958 y[16*(*idx)+12] += alpha13*(*v); 1959 y[16*(*idx)+13] += alpha14*(*v); 1960 y[16*(*idx)+14] += alpha15*(*v); 1961 y[16*(*idx)+15] += alpha16*(*v); 1962 idx++; v++; 1963 } 1964 } 1965 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 1966 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1967 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1968 PetscFunctionReturn(0); 1969 } 1970 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1973 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1974 { 1975 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1976 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1977 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1978 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1979 PetscErrorCode ierr; 1980 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1981 PetscInt n,i,jrow,j; 1982 1983 PetscFunctionBegin; 1984 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1985 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1986 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1987 idx = a->j; 1988 v = a->a; 1989 ii = a->i; 1990 1991 for (i=0; i<m; i++) { 1992 jrow = ii[i]; 1993 n = ii[i+1] - jrow; 1994 sum1 = 0.0; 1995 sum2 = 0.0; 1996 sum3 = 0.0; 1997 sum4 = 0.0; 1998 sum5 = 0.0; 1999 sum6 = 0.0; 2000 sum7 = 0.0; 2001 sum8 = 0.0; 2002 sum9 = 0.0; 2003 sum10 = 0.0; 2004 sum11 = 0.0; 2005 sum12 = 0.0; 2006 sum13 = 0.0; 2007 sum14 = 0.0; 2008 sum15 = 0.0; 2009 sum16 = 0.0; 2010 for (j=0; j<n; j++) { 2011 sum1 += v[jrow]*x[16*idx[jrow]]; 2012 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2013 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2014 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2015 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2016 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2017 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2018 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2019 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2020 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2021 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2022 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2023 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2024 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2025 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2026 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2027 jrow++; 2028 } 2029 y[16*i] += sum1; 2030 y[16*i+1] += sum2; 2031 y[16*i+2] += sum3; 2032 y[16*i+3] += sum4; 2033 y[16*i+4] += sum5; 2034 y[16*i+5] += sum6; 2035 y[16*i+6] += sum7; 2036 y[16*i+7] += sum8; 2037 y[16*i+8] += sum9; 2038 y[16*i+9] += sum10; 2039 y[16*i+10] += sum11; 2040 y[16*i+11] += sum12; 2041 y[16*i+12] += sum13; 2042 y[16*i+13] += sum14; 2043 y[16*i+14] += sum15; 2044 y[16*i+15] += sum16; 2045 } 2046 2047 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2048 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2049 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 #undef __FUNCT__ 2054 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2055 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2056 { 2057 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2058 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2059 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2060 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2061 PetscErrorCode ierr; 2062 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 2063 2064 PetscFunctionBegin; 2065 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2066 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2067 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2068 for (i=0; i<m; i++) { 2069 idx = a->j + a->i[i] ; 2070 v = a->a + a->i[i] ; 2071 n = a->i[i+1] - a->i[i]; 2072 alpha1 = x[16*i]; 2073 alpha2 = x[16*i+1]; 2074 alpha3 = x[16*i+2]; 2075 alpha4 = x[16*i+3]; 2076 alpha5 = x[16*i+4]; 2077 alpha6 = x[16*i+5]; 2078 alpha7 = x[16*i+6]; 2079 alpha8 = x[16*i+7]; 2080 alpha9 = x[16*i+8]; 2081 alpha10 = x[16*i+9]; 2082 alpha11 = x[16*i+10]; 2083 alpha12 = x[16*i+11]; 2084 alpha13 = x[16*i+12]; 2085 alpha14 = x[16*i+13]; 2086 alpha15 = x[16*i+14]; 2087 alpha16 = x[16*i+15]; 2088 while (n-->0) { 2089 y[16*(*idx)] += alpha1*(*v); 2090 y[16*(*idx)+1] += alpha2*(*v); 2091 y[16*(*idx)+2] += alpha3*(*v); 2092 y[16*(*idx)+3] += alpha4*(*v); 2093 y[16*(*idx)+4] += alpha5*(*v); 2094 y[16*(*idx)+5] += alpha6*(*v); 2095 y[16*(*idx)+6] += alpha7*(*v); 2096 y[16*(*idx)+7] += alpha8*(*v); 2097 y[16*(*idx)+8] += alpha9*(*v); 2098 y[16*(*idx)+9] += alpha10*(*v); 2099 y[16*(*idx)+10] += alpha11*(*v); 2100 y[16*(*idx)+11] += alpha12*(*v); 2101 y[16*(*idx)+12] += alpha13*(*v); 2102 y[16*(*idx)+13] += alpha14*(*v); 2103 y[16*(*idx)+14] += alpha15*(*v); 2104 y[16*(*idx)+15] += alpha16*(*v); 2105 idx++; v++; 2106 } 2107 } 2108 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2109 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2110 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 /*--------------------------------------------------------------------------------------------*/ 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2117 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2118 { 2119 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2120 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2121 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2122 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2123 PetscErrorCode ierr; 2124 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 2125 PetscInt n,i,jrow,j; 2126 2127 PetscFunctionBegin; 2128 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2129 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2130 idx = a->j; 2131 v = a->a; 2132 ii = a->i; 2133 2134 for (i=0; i<m; i++) { 2135 jrow = ii[i]; 2136 n = ii[i+1] - jrow; 2137 sum1 = 0.0; 2138 sum2 = 0.0; 2139 sum3 = 0.0; 2140 sum4 = 0.0; 2141 sum5 = 0.0; 2142 sum6 = 0.0; 2143 sum7 = 0.0; 2144 sum8 = 0.0; 2145 sum9 = 0.0; 2146 sum10 = 0.0; 2147 sum11 = 0.0; 2148 sum12 = 0.0; 2149 sum13 = 0.0; 2150 sum14 = 0.0; 2151 sum15 = 0.0; 2152 sum16 = 0.0; 2153 sum17 = 0.0; 2154 sum18 = 0.0; 2155 nonzerorow += (n>0); 2156 for (j=0; j<n; j++) { 2157 sum1 += v[jrow]*x[18*idx[jrow]]; 2158 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2159 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2160 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2161 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2162 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2163 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2164 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2165 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2166 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2167 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2168 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2169 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2170 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2171 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2172 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2173 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2174 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2175 jrow++; 2176 } 2177 y[18*i] = sum1; 2178 y[18*i+1] = sum2; 2179 y[18*i+2] = sum3; 2180 y[18*i+3] = sum4; 2181 y[18*i+4] = sum5; 2182 y[18*i+5] = sum6; 2183 y[18*i+6] = sum7; 2184 y[18*i+7] = sum8; 2185 y[18*i+8] = sum9; 2186 y[18*i+9] = sum10; 2187 y[18*i+10] = sum11; 2188 y[18*i+11] = sum12; 2189 y[18*i+12] = sum13; 2190 y[18*i+13] = sum14; 2191 y[18*i+14] = sum15; 2192 y[18*i+15] = sum16; 2193 y[18*i+16] = sum17; 2194 y[18*i+17] = sum18; 2195 } 2196 2197 ierr = PetscLogFlops(36*a->nz - 18*nonzerorow);CHKERRQ(ierr); 2198 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2199 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2205 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2206 { 2207 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2208 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2209 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 2210 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2211 PetscErrorCode ierr; 2212 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 2213 2214 PetscFunctionBegin; 2215 ierr = VecSet(yy,zero);CHKERRQ(ierr); 2216 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2217 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2218 2219 for (i=0; i<m; i++) { 2220 idx = a->j + a->i[i] ; 2221 v = a->a + a->i[i] ; 2222 n = a->i[i+1] - a->i[i]; 2223 alpha1 = x[18*i]; 2224 alpha2 = x[18*i+1]; 2225 alpha3 = x[18*i+2]; 2226 alpha4 = x[18*i+3]; 2227 alpha5 = x[18*i+4]; 2228 alpha6 = x[18*i+5]; 2229 alpha7 = x[18*i+6]; 2230 alpha8 = x[18*i+7]; 2231 alpha9 = x[18*i+8]; 2232 alpha10 = x[18*i+9]; 2233 alpha11 = x[18*i+10]; 2234 alpha12 = x[18*i+11]; 2235 alpha13 = x[18*i+12]; 2236 alpha14 = x[18*i+13]; 2237 alpha15 = x[18*i+14]; 2238 alpha16 = x[18*i+15]; 2239 alpha17 = x[18*i+16]; 2240 alpha18 = x[18*i+17]; 2241 while (n-->0) { 2242 y[18*(*idx)] += alpha1*(*v); 2243 y[18*(*idx)+1] += alpha2*(*v); 2244 y[18*(*idx)+2] += alpha3*(*v); 2245 y[18*(*idx)+3] += alpha4*(*v); 2246 y[18*(*idx)+4] += alpha5*(*v); 2247 y[18*(*idx)+5] += alpha6*(*v); 2248 y[18*(*idx)+6] += alpha7*(*v); 2249 y[18*(*idx)+7] += alpha8*(*v); 2250 y[18*(*idx)+8] += alpha9*(*v); 2251 y[18*(*idx)+9] += alpha10*(*v); 2252 y[18*(*idx)+10] += alpha11*(*v); 2253 y[18*(*idx)+11] += alpha12*(*v); 2254 y[18*(*idx)+12] += alpha13*(*v); 2255 y[18*(*idx)+13] += alpha14*(*v); 2256 y[18*(*idx)+14] += alpha15*(*v); 2257 y[18*(*idx)+15] += alpha16*(*v); 2258 y[18*(*idx)+16] += alpha17*(*v); 2259 y[18*(*idx)+17] += alpha18*(*v); 2260 idx++; v++; 2261 } 2262 } 2263 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2264 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2265 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2271 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2272 { 2273 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2274 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2275 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2276 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2277 PetscErrorCode ierr; 2278 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 2279 PetscInt n,i,jrow,j; 2280 2281 PetscFunctionBegin; 2282 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2283 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2284 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2285 idx = a->j; 2286 v = a->a; 2287 ii = a->i; 2288 2289 for (i=0; i<m; i++) { 2290 jrow = ii[i]; 2291 n = ii[i+1] - jrow; 2292 sum1 = 0.0; 2293 sum2 = 0.0; 2294 sum3 = 0.0; 2295 sum4 = 0.0; 2296 sum5 = 0.0; 2297 sum6 = 0.0; 2298 sum7 = 0.0; 2299 sum8 = 0.0; 2300 sum9 = 0.0; 2301 sum10 = 0.0; 2302 sum11 = 0.0; 2303 sum12 = 0.0; 2304 sum13 = 0.0; 2305 sum14 = 0.0; 2306 sum15 = 0.0; 2307 sum16 = 0.0; 2308 sum17 = 0.0; 2309 sum18 = 0.0; 2310 for (j=0; j<n; j++) { 2311 sum1 += v[jrow]*x[18*idx[jrow]]; 2312 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2313 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2314 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2315 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2316 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2317 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2318 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2319 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2320 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2321 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2322 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2323 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2324 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2325 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2326 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2327 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2328 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2329 jrow++; 2330 } 2331 y[18*i] += sum1; 2332 y[18*i+1] += sum2; 2333 y[18*i+2] += sum3; 2334 y[18*i+3] += sum4; 2335 y[18*i+4] += sum5; 2336 y[18*i+5] += sum6; 2337 y[18*i+6] += sum7; 2338 y[18*i+7] += sum8; 2339 y[18*i+8] += sum9; 2340 y[18*i+9] += sum10; 2341 y[18*i+10] += sum11; 2342 y[18*i+11] += sum12; 2343 y[18*i+12] += sum13; 2344 y[18*i+13] += sum14; 2345 y[18*i+14] += sum15; 2346 y[18*i+15] += sum16; 2347 y[18*i+16] += sum17; 2348 y[18*i+17] += sum18; 2349 } 2350 2351 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2352 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2353 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2354 PetscFunctionReturn(0); 2355 } 2356 2357 #undef __FUNCT__ 2358 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2359 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2360 { 2361 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2362 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2363 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2364 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2365 PetscErrorCode ierr; 2366 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 2367 2368 PetscFunctionBegin; 2369 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2370 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2371 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2372 for (i=0; i<m; i++) { 2373 idx = a->j + a->i[i] ; 2374 v = a->a + a->i[i] ; 2375 n = a->i[i+1] - a->i[i]; 2376 alpha1 = x[18*i]; 2377 alpha2 = x[18*i+1]; 2378 alpha3 = x[18*i+2]; 2379 alpha4 = x[18*i+3]; 2380 alpha5 = x[18*i+4]; 2381 alpha6 = x[18*i+5]; 2382 alpha7 = x[18*i+6]; 2383 alpha8 = x[18*i+7]; 2384 alpha9 = x[18*i+8]; 2385 alpha10 = x[18*i+9]; 2386 alpha11 = x[18*i+10]; 2387 alpha12 = x[18*i+11]; 2388 alpha13 = x[18*i+12]; 2389 alpha14 = x[18*i+13]; 2390 alpha15 = x[18*i+14]; 2391 alpha16 = x[18*i+15]; 2392 alpha17 = x[18*i+16]; 2393 alpha18 = x[18*i+17]; 2394 while (n-->0) { 2395 y[18*(*idx)] += alpha1*(*v); 2396 y[18*(*idx)+1] += alpha2*(*v); 2397 y[18*(*idx)+2] += alpha3*(*v); 2398 y[18*(*idx)+3] += alpha4*(*v); 2399 y[18*(*idx)+4] += alpha5*(*v); 2400 y[18*(*idx)+5] += alpha6*(*v); 2401 y[18*(*idx)+6] += alpha7*(*v); 2402 y[18*(*idx)+7] += alpha8*(*v); 2403 y[18*(*idx)+8] += alpha9*(*v); 2404 y[18*(*idx)+9] += alpha10*(*v); 2405 y[18*(*idx)+10] += alpha11*(*v); 2406 y[18*(*idx)+11] += alpha12*(*v); 2407 y[18*(*idx)+12] += alpha13*(*v); 2408 y[18*(*idx)+13] += alpha14*(*v); 2409 y[18*(*idx)+14] += alpha15*(*v); 2410 y[18*(*idx)+15] += alpha16*(*v); 2411 y[18*(*idx)+16] += alpha17*(*v); 2412 y[18*(*idx)+17] += alpha18*(*v); 2413 idx++; v++; 2414 } 2415 } 2416 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2417 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2419 PetscFunctionReturn(0); 2420 } 2421 2422 /*===================================================================================*/ 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2425 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2426 { 2427 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2428 PetscErrorCode ierr; 2429 2430 PetscFunctionBegin; 2431 /* start the scatter */ 2432 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2433 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2434 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2435 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2441 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2442 { 2443 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2444 PetscErrorCode ierr; 2445 2446 PetscFunctionBegin; 2447 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2448 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2449 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2450 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2456 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2457 { 2458 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2459 PetscErrorCode ierr; 2460 2461 PetscFunctionBegin; 2462 /* start the scatter */ 2463 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2464 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2465 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2466 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2472 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2473 { 2474 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2479 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2480 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2481 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 /* ----------------------------------------------------------------*/ 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2488 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2489 { 2490 /* This routine requires testing -- but it's getting better. */ 2491 PetscErrorCode ierr; 2492 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2493 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2494 Mat P=pp->AIJ; 2495 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2496 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 2497 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2498 PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn; 2499 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 2500 MatScalar *ca; 2501 2502 PetscFunctionBegin; 2503 /* Start timer */ 2504 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2505 2506 /* Get ij structure of P^T */ 2507 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2508 2509 cn = pn*ppdof; 2510 /* Allocate ci array, arrays for fill computation and */ 2511 /* free space for accumulating nonzero column info */ 2512 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2513 ci[0] = 0; 2514 2515 /* Work arrays for rows of P^T*A */ 2516 ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 2517 ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 2518 ptasparserow = ptadenserow + an; 2519 denserow = ptasparserow + an; 2520 sparserow = denserow + cn; 2521 2522 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2523 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2524 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2525 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 2526 current_space = free_space; 2527 2528 /* Determine symbolic info for each row of C: */ 2529 for (i=0;i<pn;i++) { 2530 ptnzi = pti[i+1] - pti[i]; 2531 ptJ = ptj + pti[i]; 2532 for (dof=0;dof<ppdof;dof++) { 2533 ptanzi = 0; 2534 /* Determine symbolic row of PtA: */ 2535 for (j=0;j<ptnzi;j++) { 2536 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2537 arow = ptJ[j]*ppdof + dof; 2538 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2539 anzj = ai[arow+1] - ai[arow]; 2540 ajj = aj + ai[arow]; 2541 for (k=0;k<anzj;k++) { 2542 if (!ptadenserow[ajj[k]]) { 2543 ptadenserow[ajj[k]] = -1; 2544 ptasparserow[ptanzi++] = ajj[k]; 2545 } 2546 } 2547 } 2548 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2549 ptaj = ptasparserow; 2550 cnzi = 0; 2551 for (j=0;j<ptanzi;j++) { 2552 /* Get offset within block of P */ 2553 pshift = *ptaj%ppdof; 2554 /* Get block row of P */ 2555 prow = (*ptaj++)/ppdof; /* integer division */ 2556 /* P has same number of nonzeros per row as the compressed form */ 2557 pnzj = pi[prow+1] - pi[prow]; 2558 pjj = pj + pi[prow]; 2559 for (k=0;k<pnzj;k++) { 2560 /* Locations in C are shifted by the offset within the block */ 2561 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2562 if (!denserow[pjj[k]*ppdof+pshift]) { 2563 denserow[pjj[k]*ppdof+pshift] = -1; 2564 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2565 } 2566 } 2567 } 2568 2569 /* sort sparserow */ 2570 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2571 2572 /* If free space is not available, make more free space */ 2573 /* Double the amount of total space in the list */ 2574 if (current_space->local_remaining<cnzi) { 2575 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2576 } 2577 2578 /* Copy data into free space, and zero out denserows */ 2579 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2580 current_space->array += cnzi; 2581 current_space->local_used += cnzi; 2582 current_space->local_remaining -= cnzi; 2583 2584 for (j=0;j<ptanzi;j++) { 2585 ptadenserow[ptasparserow[j]] = 0; 2586 } 2587 for (j=0;j<cnzi;j++) { 2588 denserow[sparserow[j]] = 0; 2589 } 2590 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2591 /* For now, we will recompute what is needed. */ 2592 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2593 } 2594 } 2595 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2596 /* Allocate space for cj, initialize cj, and */ 2597 /* destroy list of free space and other temporary array(s) */ 2598 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2599 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2600 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 2601 2602 /* Allocate space for ca */ 2603 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2604 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2605 2606 /* put together the new matrix */ 2607 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2608 2609 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2610 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2611 c = (Mat_SeqAIJ *)((*C)->data); 2612 c->free_a = PETSC_TRUE; 2613 c->free_ij = PETSC_TRUE; 2614 c->nonew = 0; 2615 2616 /* Clean up. */ 2617 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2618 2619 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2620 PetscFunctionReturn(0); 2621 } 2622 2623 #undef __FUNCT__ 2624 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 2625 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2626 { 2627 /* This routine requires testing -- first draft only */ 2628 PetscErrorCode ierr; 2629 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2630 Mat P=pp->AIJ; 2631 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 2632 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 2633 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2634 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 2635 PetscInt *ci=c->i,*cj=c->j,*cjj; 2636 PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof; 2637 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2638 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 2639 2640 PetscFunctionBegin; 2641 /* Allocate temporary array for storage of one row of A*P */ 2642 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2643 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2644 2645 apj = (PetscInt *)(apa + cn); 2646 apjdense = apj + cn; 2647 2648 /* Clear old values in C */ 2649 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2650 2651 for (i=0;i<am;i++) { 2652 /* Form sparse row of A*P */ 2653 anzi = ai[i+1] - ai[i]; 2654 apnzj = 0; 2655 for (j=0;j<anzi;j++) { 2656 /* Get offset within block of P */ 2657 pshift = *aj%ppdof; 2658 /* Get block row of P */ 2659 prow = *aj++/ppdof; /* integer division */ 2660 pnzj = pi[prow+1] - pi[prow]; 2661 pjj = pj + pi[prow]; 2662 paj = pa + pi[prow]; 2663 for (k=0;k<pnzj;k++) { 2664 poffset = pjj[k]*ppdof+pshift; 2665 if (!apjdense[poffset]) { 2666 apjdense[poffset] = -1; 2667 apj[apnzj++] = poffset; 2668 } 2669 apa[poffset] += (*aa)*paj[k]; 2670 } 2671 ierr = PetscLogFlops(2*pnzj);CHKERRQ(ierr); 2672 aa++; 2673 } 2674 2675 /* Sort the j index array for quick sparse axpy. */ 2676 /* Note: a array does not need sorting as it is in dense storage locations. */ 2677 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2678 2679 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2680 prow = i/ppdof; /* integer division */ 2681 pshift = i%ppdof; 2682 poffset = pi[prow]; 2683 pnzi = pi[prow+1] - poffset; 2684 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2685 pJ = pj+poffset; 2686 pA = pa+poffset; 2687 for (j=0;j<pnzi;j++) { 2688 crow = (*pJ)*ppdof+pshift; 2689 cjj = cj + ci[crow]; 2690 caj = ca + ci[crow]; 2691 pJ++; 2692 /* Perform sparse axpy operation. Note cjj includes apj. */ 2693 for (k=0,nextap=0;nextap<apnzj;k++) { 2694 if (cjj[k]==apj[nextap]) { 2695 caj[k] += (*pA)*apa[apj[nextap++]]; 2696 } 2697 } 2698 ierr = PetscLogFlops(2*apnzj);CHKERRQ(ierr); 2699 pA++; 2700 } 2701 2702 /* Zero the current row info for A*P */ 2703 for (j=0;j<apnzj;j++) { 2704 apa[apj[j]] = 0.; 2705 apjdense[apj[j]] = 0; 2706 } 2707 } 2708 2709 /* Assemble the final matrix and clean up */ 2710 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2711 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2712 ierr = PetscFree(apa);CHKERRQ(ierr); 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 2718 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2719 { 2720 PetscErrorCode ierr; 2721 2722 PetscFunctionBegin; 2723 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 2724 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 2725 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 2726 PetscFunctionReturn(0); 2727 } 2728 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 2731 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 2732 { 2733 PetscFunctionBegin; 2734 SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 2735 PetscFunctionReturn(0); 2736 } 2737 2738 EXTERN_C_BEGIN 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2741 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2742 { 2743 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2744 Mat a = b->AIJ,B; 2745 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 2746 PetscErrorCode ierr; 2747 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2748 PetscInt *cols; 2749 PetscScalar *vals; 2750 2751 PetscFunctionBegin; 2752 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 2753 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 2754 for (i=0; i<m; i++) { 2755 nmax = PetscMax(nmax,aij->ilen[i]); 2756 for (j=0; j<dof; j++) { 2757 ilen[dof*i+j] = aij->ilen[i]; 2758 } 2759 } 2760 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2761 ierr = PetscFree(ilen);CHKERRQ(ierr); 2762 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 2763 ii = 0; 2764 for (i=0; i<m; i++) { 2765 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2766 for (j=0; j<dof; j++) { 2767 for (k=0; k<ncols; k++) { 2768 icols[k] = dof*cols[k]+j; 2769 } 2770 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2771 ii++; 2772 } 2773 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2774 } 2775 ierr = PetscFree(icols);CHKERRQ(ierr); 2776 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2777 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2778 2779 if (reuse == MAT_REUSE_MATRIX) { 2780 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2781 } else { 2782 *newmat = B; 2783 } 2784 PetscFunctionReturn(0); 2785 } 2786 EXTERN_C_END 2787 2788 #include "src/mat/impls/aij/mpi/mpiaij.h" 2789 2790 EXTERN_C_BEGIN 2791 #undef __FUNCT__ 2792 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2793 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2794 { 2795 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2796 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 2797 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 2798 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 2799 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 2800 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2801 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 2802 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 2803 PetscInt rstart,cstart,*garray,ii,k; 2804 PetscErrorCode ierr; 2805 PetscScalar *vals,*ovals; 2806 2807 PetscFunctionBegin; 2808 ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr); 2809 for (i=0; i<A->rmap.n/dof; i++) { 2810 nmax = PetscMax(nmax,AIJ->ilen[i]); 2811 onmax = PetscMax(onmax,OAIJ->ilen[i]); 2812 for (j=0; j<dof; j++) { 2813 dnz[dof*i+j] = AIJ->ilen[i]; 2814 onz[dof*i+j] = OAIJ->ilen[i]; 2815 } 2816 } 2817 ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2818 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 2819 2820 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2821 rstart = dof*maij->A->rmap.rstart; 2822 cstart = dof*maij->A->cmap.rstart; 2823 garray = mpiaij->garray; 2824 2825 ii = rstart; 2826 for (i=0; i<A->rmap.n/dof; i++) { 2827 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2828 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2829 for (j=0; j<dof; j++) { 2830 for (k=0; k<ncols; k++) { 2831 icols[k] = cstart + dof*cols[k]+j; 2832 } 2833 for (k=0; k<oncols; k++) { 2834 oicols[k] = dof*garray[ocols[k]]+j; 2835 } 2836 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2837 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 2838 ii++; 2839 } 2840 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2841 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2842 } 2843 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 2844 2845 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2846 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2847 2848 if (reuse == MAT_REUSE_MATRIX) { 2849 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 2850 ((PetscObject)A)->refct = 1; 2851 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2852 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 2853 } else { 2854 *newmat = B; 2855 } 2856 PetscFunctionReturn(0); 2857 } 2858 EXTERN_C_END 2859 2860 2861 /* ---------------------------------------------------------------------------------- */ 2862 /*MC 2863 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 2864 operations for multicomponent problems. It interpolates each component the same 2865 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 2866 and MATMPIAIJ for distributed matrices. 2867 2868 Operations provided: 2869 + MatMult 2870 . MatMultTranspose 2871 . MatMultAdd 2872 . MatMultTransposeAdd 2873 - MatView 2874 2875 Level: advanced 2876 2877 M*/ 2878 #undef __FUNCT__ 2879 #define __FUNCT__ "MatCreateMAIJ" 2880 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 2881 { 2882 PetscErrorCode ierr; 2883 PetscMPIInt size; 2884 PetscInt n; 2885 Mat_MPIMAIJ *b; 2886 Mat B; 2887 2888 PetscFunctionBegin; 2889 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2890 2891 if (dof == 1) { 2892 *maij = A; 2893 } else { 2894 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2895 ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr); 2896 B->assembled = PETSC_TRUE; 2897 2898 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2899 if (size == 1) { 2900 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 2901 B->ops->destroy = MatDestroy_SeqMAIJ; 2902 B->ops->view = MatView_SeqMAIJ; 2903 b = (Mat_MPIMAIJ*)B->data; 2904 b->dof = dof; 2905 b->AIJ = A; 2906 if (dof == 2) { 2907 B->ops->mult = MatMult_SeqMAIJ_2; 2908 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2909 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2910 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2911 } else if (dof == 3) { 2912 B->ops->mult = MatMult_SeqMAIJ_3; 2913 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2914 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2915 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2916 } else if (dof == 4) { 2917 B->ops->mult = MatMult_SeqMAIJ_4; 2918 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2919 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2920 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2921 } else if (dof == 5) { 2922 B->ops->mult = MatMult_SeqMAIJ_5; 2923 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2924 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2925 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 2926 } else if (dof == 6) { 2927 B->ops->mult = MatMult_SeqMAIJ_6; 2928 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 2929 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 2930 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2931 } else if (dof == 7) { 2932 B->ops->mult = MatMult_SeqMAIJ_7; 2933 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2934 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2935 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 2936 } else if (dof == 8) { 2937 B->ops->mult = MatMult_SeqMAIJ_8; 2938 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 2939 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 2940 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 2941 } else if (dof == 9) { 2942 B->ops->mult = MatMult_SeqMAIJ_9; 2943 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 2944 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 2945 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2946 } else if (dof == 10) { 2947 B->ops->mult = MatMult_SeqMAIJ_10; 2948 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2949 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2950 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 2951 } else if (dof == 16) { 2952 B->ops->mult = MatMult_SeqMAIJ_16; 2953 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 2954 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 2955 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 2956 } else if (dof == 18) { 2957 B->ops->mult = MatMult_SeqMAIJ_18; 2958 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 2959 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 2960 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 2961 } else { 2962 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 2963 } 2964 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 2965 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2966 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2967 } else { 2968 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2969 IS from,to; 2970 Vec gvec; 2971 PetscInt *garray,i; 2972 2973 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2974 B->ops->destroy = MatDestroy_MPIMAIJ; 2975 B->ops->view = MatView_MPIMAIJ; 2976 b = (Mat_MPIMAIJ*)B->data; 2977 b->dof = dof; 2978 b->A = A; 2979 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 2980 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 2981 2982 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2983 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 2984 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2985 2986 /* create two temporary Index sets for build scatter gather */ 2987 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2988 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2989 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr); 2990 ierr = PetscFree(garray);CHKERRQ(ierr); 2991 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2992 2993 /* create temporary global vector to generate scatter context */ 2994 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap.n,dof*A->cmap.N,PETSC_NULL,&gvec);CHKERRQ(ierr); 2995 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2996 2997 /* generate the scatter context */ 2998 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2999 3000 ierr = ISDestroy(from);CHKERRQ(ierr); 3001 ierr = ISDestroy(to);CHKERRQ(ierr); 3002 ierr = VecDestroy(gvec);CHKERRQ(ierr); 3003 3004 B->ops->mult = MatMult_MPIMAIJ_dof; 3005 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3006 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3007 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3008 B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 3009 B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 3010 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3011 } 3012 *maij = B; 3013 ierr = MatView_Private(B);CHKERRQ(ierr); 3014 } 3015 PetscFunctionReturn(0); 3016 } 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029