Lines Matching refs:v

241   const MatScalar   *v;  in MatMult_SeqBAIJ_1()  local
262 v = a->a + ii[0]; in MatMult_SeqBAIJ_1()
266 PetscPrefetchBlock(v + 1 * n, 1 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMult_SeqBAIJ_1()
268 PetscSparseDensePlusDot(sum, x, v, idx, n); in MatMult_SeqBAIJ_1()
287 const MatScalar *v; in MatMult_SeqBAIJ_2() local
296 v = a->a; in MatMult_SeqBAIJ_2()
314 PetscPrefetchBlock(v + 4 * n, 4 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMult_SeqBAIJ_2()
319 sum1 += v[0] * x1 + v[2] * x2; in MatMult_SeqBAIJ_2()
320 sum2 += v[1] * x1 + v[3] * x2; in MatMult_SeqBAIJ_2()
321 v += 4; in MatMult_SeqBAIJ_2()
339 const MatScalar *v; in MatMult_SeqBAIJ_3() local
344 #pragma disjoint(*v, *z, *xb) in MatMult_SeqBAIJ_3()
352 v = a->a; in MatMult_SeqBAIJ_3()
371 PetscPrefetchBlock(v + 9 * n, 9 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMult_SeqBAIJ_3()
378 sum1 += v[0] * x1 + v[3] * x2 + v[6] * x3; in MatMult_SeqBAIJ_3()
379 sum2 += v[1] * x1 + v[4] * x2 + v[7] * x3; in MatMult_SeqBAIJ_3()
380 sum3 += v[2] * x1 + v[5] * x2 + v[8] * x3; in MatMult_SeqBAIJ_3()
381 v += 9; in MatMult_SeqBAIJ_3()
400 const MatScalar *v; in MatMult_SeqBAIJ_4() local
409 v = a->a; in MatMult_SeqBAIJ_4()
430 … PetscPrefetchBlock(v + 16 * n, 16 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMult_SeqBAIJ_4()
437 sum1 += v[0] * x1 + v[4] * x2 + v[8] * x3 + v[12] * x4; in MatMult_SeqBAIJ_4()
438 sum2 += v[1] * x1 + v[5] * x2 + v[9] * x3 + v[13] * x4; in MatMult_SeqBAIJ_4()
439 sum3 += v[2] * x1 + v[6] * x2 + v[10] * x3 + v[14] * x4; in MatMult_SeqBAIJ_4()
440 sum4 += v[3] * x1 + v[7] * x2 + v[11] * x3 + v[15] * x4; in MatMult_SeqBAIJ_4()
441 v += 16; in MatMult_SeqBAIJ_4()
461 const MatScalar *v; in MatMult_SeqBAIJ_5() local
471 v = a->a; in MatMult_SeqBAIJ_5()
492 … PetscPrefetchBlock(v + 25 * n, 25 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMult_SeqBAIJ_5()
500 sum1 += v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5; in MatMult_SeqBAIJ_5()
501 sum2 += v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5; in MatMult_SeqBAIJ_5()
502 sum3 += v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5; in MatMult_SeqBAIJ_5()
503 sum4 += v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5; in MatMult_SeqBAIJ_5()
504 sum5 += v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5; in MatMult_SeqBAIJ_5()
505 v += 25; in MatMult_SeqBAIJ_5()
527 const MatScalar *v; in MatMult_SeqBAIJ_6() local
536 v = a->a; in MatMult_SeqBAIJ_6()
559 … PetscPrefetchBlock(v + 36 * n, 36 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMult_SeqBAIJ_6()
568 sum1 += v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6; in MatMult_SeqBAIJ_6()
569 sum2 += v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6; in MatMult_SeqBAIJ_6()
570 sum3 += v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6; in MatMult_SeqBAIJ_6()
571 sum4 += v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6; in MatMult_SeqBAIJ_6()
572 sum5 += v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6; in MatMult_SeqBAIJ_6()
573 sum6 += v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6; in MatMult_SeqBAIJ_6()
574 v += 36; in MatMult_SeqBAIJ_6()
598 const MatScalar *v; in MatMult_SeqBAIJ_7() local
607 v = a->a; in MatMult_SeqBAIJ_7()
631 … PetscPrefetchBlock(v + 49 * n, 49 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMult_SeqBAIJ_7()
641 … sum1 += v[0] * x1 + v[7] * x2 + v[14] * x3 + v[21] * x4 + v[28] * x5 + v[35] * x6 + v[42] * x7; in MatMult_SeqBAIJ_7()
642 … sum2 += v[1] * x1 + v[8] * x2 + v[15] * x3 + v[22] * x4 + v[29] * x5 + v[36] * x6 + v[43] * x7; in MatMult_SeqBAIJ_7()
643 … sum3 += v[2] * x1 + v[9] * x2 + v[16] * x3 + v[23] * x4 + v[30] * x5 + v[37] * x6 + v[44] * x7; in MatMult_SeqBAIJ_7()
644 … sum4 += v[3] * x1 + v[10] * x2 + v[17] * x3 + v[24] * x4 + v[31] * x5 + v[38] * x6 + v[45] * x7; in MatMult_SeqBAIJ_7()
645 … sum5 += v[4] * x1 + v[11] * x2 + v[18] * x3 + v[25] * x4 + v[32] * x5 + v[39] * x6 + v[46] * x7; in MatMult_SeqBAIJ_7()
646 … sum6 += v[5] * x1 + v[12] * x2 + v[19] * x3 + v[26] * x4 + v[33] * x5 + v[40] * x6 + v[47] * x7; in MatMult_SeqBAIJ_7()
647 … sum7 += v[6] * x1 + v[13] * x2 + v[20] * x3 + v[27] * x4 + v[34] * x5 + v[41] * x6 + v[48] * x7; in MatMult_SeqBAIJ_7()
648 v += 49; in MatMult_SeqBAIJ_7()
673 const MatScalar *v; in MatMult_SeqBAIJ_9_AVX2() local
689 v = a->a; in MatMult_SeqBAIJ_9_AVX2()
725 a0 = _mm256_loadu_pd(&v[j * 81]); in MatMult_SeqBAIJ_9_AVX2()
727 a1 = _mm256_loadu_pd(&v[j * 81 + 4]); in MatMult_SeqBAIJ_9_AVX2()
729 a2 = _mm256_loadu_pd(&v[j * 81 + 8]); in MatMult_SeqBAIJ_9_AVX2()
734 a0 = _mm256_loadu_pd(&v[j * 81 + 9]); in MatMult_SeqBAIJ_9_AVX2()
736 a1 = _mm256_loadu_pd(&v[j * 81 + 13]); in MatMult_SeqBAIJ_9_AVX2()
738 a2 = _mm256_loadu_pd(&v[j * 81 + 17]); in MatMult_SeqBAIJ_9_AVX2()
743 a3 = _mm256_loadu_pd(&v[j * 81 + 18]); in MatMult_SeqBAIJ_9_AVX2()
745 a4 = _mm256_loadu_pd(&v[j * 81 + 22]); in MatMult_SeqBAIJ_9_AVX2()
747 a5 = _mm256_loadu_pd(&v[j * 81 + 26]); in MatMult_SeqBAIJ_9_AVX2()
752 a0 = _mm256_loadu_pd(&v[j * 81 + 27]); in MatMult_SeqBAIJ_9_AVX2()
754 a1 = _mm256_loadu_pd(&v[j * 81 + 31]); in MatMult_SeqBAIJ_9_AVX2()
756 a2 = _mm256_loadu_pd(&v[j * 81 + 35]); in MatMult_SeqBAIJ_9_AVX2()
761 a3 = _mm256_loadu_pd(&v[j * 81 + 36]); in MatMult_SeqBAIJ_9_AVX2()
763 a4 = _mm256_loadu_pd(&v[j * 81 + 40]); in MatMult_SeqBAIJ_9_AVX2()
765 a5 = _mm256_loadu_pd(&v[j * 81 + 44]); in MatMult_SeqBAIJ_9_AVX2()
770 a0 = _mm256_loadu_pd(&v[j * 81 + 45]); in MatMult_SeqBAIJ_9_AVX2()
772 a1 = _mm256_loadu_pd(&v[j * 81 + 49]); in MatMult_SeqBAIJ_9_AVX2()
774 a2 = _mm256_loadu_pd(&v[j * 81 + 53]); in MatMult_SeqBAIJ_9_AVX2()
779 a0 = _mm256_loadu_pd(&v[j * 81 + 54]); in MatMult_SeqBAIJ_9_AVX2()
781 a1 = _mm256_loadu_pd(&v[j * 81 + 58]); in MatMult_SeqBAIJ_9_AVX2()
783 a2 = _mm256_loadu_pd(&v[j * 81 + 62]); in MatMult_SeqBAIJ_9_AVX2()
788 a3 = _mm256_loadu_pd(&v[j * 81 + 63]); in MatMult_SeqBAIJ_9_AVX2()
790 a4 = _mm256_loadu_pd(&v[j * 81 + 67]); in MatMult_SeqBAIJ_9_AVX2()
792 a5 = _mm256_loadu_pd(&v[j * 81 + 71]); in MatMult_SeqBAIJ_9_AVX2()
797 a0 = _mm256_loadu_pd(&v[j * 81 + 72]); in MatMult_SeqBAIJ_9_AVX2()
799 a1 = _mm256_loadu_pd(&v[j * 81 + 76]); in MatMult_SeqBAIJ_9_AVX2()
801 a2 = _mm256_maskload_pd(&v[j * 81 + 80], mask1); in MatMult_SeqBAIJ_9_AVX2()
809 v += n * bs2; in MatMult_SeqBAIJ_9_AVX2()
825 const MatScalar *v; in MatMult_SeqBAIJ_11() local
834 v = a->a; in MatMult_SeqBAIJ_11()
866 sum1 += v[0] * xv; in MatMult_SeqBAIJ_11()
867 sum2 += v[1] * xv; in MatMult_SeqBAIJ_11()
868 sum3 += v[2] * xv; in MatMult_SeqBAIJ_11()
869 sum4 += v[3] * xv; in MatMult_SeqBAIJ_11()
870 sum5 += v[4] * xv; in MatMult_SeqBAIJ_11()
871 sum6 += v[5] * xv; in MatMult_SeqBAIJ_11()
872 sum7 += v[6] * xv; in MatMult_SeqBAIJ_11()
873 sum8 += v[7] * xv; in MatMult_SeqBAIJ_11()
874 sum9 += v[8] * xv; in MatMult_SeqBAIJ_11()
875 sum10 += v[9] * xv; in MatMult_SeqBAIJ_11()
876 sum11 += v[10] * xv; in MatMult_SeqBAIJ_11()
877 v += 11; in MatMult_SeqBAIJ_11()
909 const MatScalar *v; in MatMult_SeqBAIJ_12_ver1() local
918 v = a->a; in MatMult_SeqBAIJ_12_ver1()
951 sum1 += v[0] * xv; in MatMult_SeqBAIJ_12_ver1()
952 sum2 += v[1] * xv; in MatMult_SeqBAIJ_12_ver1()
953 sum3 += v[2] * xv; in MatMult_SeqBAIJ_12_ver1()
954 sum4 += v[3] * xv; in MatMult_SeqBAIJ_12_ver1()
955 sum5 += v[4] * xv; in MatMult_SeqBAIJ_12_ver1()
956 sum6 += v[5] * xv; in MatMult_SeqBAIJ_12_ver1()
957 sum7 += v[6] * xv; in MatMult_SeqBAIJ_12_ver1()
958 sum8 += v[7] * xv; in MatMult_SeqBAIJ_12_ver1()
959 sum9 += v[8] * xv; in MatMult_SeqBAIJ_12_ver1()
960 sum10 += v[9] * xv; in MatMult_SeqBAIJ_12_ver1()
961 sum11 += v[10] * xv; in MatMult_SeqBAIJ_12_ver1()
962 sum12 += v[11] * xv; in MatMult_SeqBAIJ_12_ver1()
963 v += 12; in MatMult_SeqBAIJ_12_ver1()
993 const MatScalar *v; in MatMultAdd_SeqBAIJ_12_ver1() local
1002 v = a->a; in MatMultAdd_SeqBAIJ_12_ver1()
1040 sum1 += v[0] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1041 sum2 += v[1] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1042 sum3 += v[2] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1043 sum4 += v[3] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1044 sum5 += v[4] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1045 sum6 += v[5] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1046 sum7 += v[6] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1047 sum8 += v[7] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1048 sum9 += v[8] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1049 sum10 += v[9] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1050 sum11 += v[10] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1051 sum12 += v[11] * xv; in MatMultAdd_SeqBAIJ_12_ver1()
1052 v += 12; in MatMultAdd_SeqBAIJ_12_ver1()
1086 const MatScalar *v; in MatMult_SeqBAIJ_12_ver2() local
1095 v = a->a; in MatMult_SeqBAIJ_12_ver2()
1119 sum1 += v[0] * x1 + v[12] * x2 + v[24] * x3 + v[36] * x4; in MatMult_SeqBAIJ_12_ver2()
1120 sum2 += v[1] * x1 + v[13] * x2 + v[25] * x3 + v[37] * x4; in MatMult_SeqBAIJ_12_ver2()
1121 sum3 += v[2] * x1 + v[14] * x2 + v[26] * x3 + v[38] * x4; in MatMult_SeqBAIJ_12_ver2()
1122 sum4 += v[3] * x1 + v[15] * x2 + v[27] * x3 + v[39] * x4; in MatMult_SeqBAIJ_12_ver2()
1123 sum5 += v[4] * x1 + v[16] * x2 + v[28] * x3 + v[40] * x4; in MatMult_SeqBAIJ_12_ver2()
1124 sum6 += v[5] * x1 + v[17] * x2 + v[29] * x3 + v[41] * x4; in MatMult_SeqBAIJ_12_ver2()
1125 sum7 += v[6] * x1 + v[18] * x2 + v[30] * x3 + v[42] * x4; in MatMult_SeqBAIJ_12_ver2()
1126 sum8 += v[7] * x1 + v[19] * x2 + v[31] * x3 + v[43] * x4; in MatMult_SeqBAIJ_12_ver2()
1127 sum9 += v[8] * x1 + v[20] * x2 + v[32] * x3 + v[44] * x4; in MatMult_SeqBAIJ_12_ver2()
1128 sum10 += v[9] * x1 + v[21] * x2 + v[33] * x3 + v[45] * x4; in MatMult_SeqBAIJ_12_ver2()
1129 sum11 += v[10] * x1 + v[22] * x2 + v[34] * x3 + v[46] * x4; in MatMult_SeqBAIJ_12_ver2()
1130 sum12 += v[11] * x1 + v[23] * x2 + v[35] * x3 + v[47] * x4; in MatMult_SeqBAIJ_12_ver2()
1131 v += 48; in MatMult_SeqBAIJ_12_ver2()
1138 sum1 += v[0] * x1 + v[12] * x2 + v[24] * x3 + v[36] * x4; in MatMult_SeqBAIJ_12_ver2()
1139 sum2 += v[1] * x1 + v[13] * x2 + v[25] * x3 + v[37] * x4; in MatMult_SeqBAIJ_12_ver2()
1140 sum3 += v[2] * x1 + v[14] * x2 + v[26] * x3 + v[38] * x4; in MatMult_SeqBAIJ_12_ver2()
1141 sum4 += v[3] * x1 + v[15] * x2 + v[27] * x3 + v[39] * x4; in MatMult_SeqBAIJ_12_ver2()
1142 sum5 += v[4] * x1 + v[16] * x2 + v[28] * x3 + v[40] * x4; in MatMult_SeqBAIJ_12_ver2()
1143 sum6 += v[5] * x1 + v[17] * x2 + v[29] * x3 + v[41] * x4; in MatMult_SeqBAIJ_12_ver2()
1144 sum7 += v[6] * x1 + v[18] * x2 + v[30] * x3 + v[42] * x4; in MatMult_SeqBAIJ_12_ver2()
1145 sum8 += v[7] * x1 + v[19] * x2 + v[31] * x3 + v[43] * x4; in MatMult_SeqBAIJ_12_ver2()
1146 sum9 += v[8] * x1 + v[20] * x2 + v[32] * x3 + v[44] * x4; in MatMult_SeqBAIJ_12_ver2()
1147 sum10 += v[9] * x1 + v[21] * x2 + v[33] * x3 + v[45] * x4; in MatMult_SeqBAIJ_12_ver2()
1148 sum11 += v[10] * x1 + v[22] * x2 + v[34] * x3 + v[46] * x4; in MatMult_SeqBAIJ_12_ver2()
1149 sum12 += v[11] * x1 + v[23] * x2 + v[35] * x3 + v[47] * x4; in MatMult_SeqBAIJ_12_ver2()
1150 v += 48; in MatMult_SeqBAIJ_12_ver2()
1156 sum1 += v[0] * x1 + v[12] * x2 + v[24] * x3 + v[36] * x4; in MatMult_SeqBAIJ_12_ver2()
1157 sum2 += v[1] * x1 + v[13] * x2 + v[25] * x3 + v[37] * x4; in MatMult_SeqBAIJ_12_ver2()
1158 sum3 += v[2] * x1 + v[14] * x2 + v[26] * x3 + v[38] * x4; in MatMult_SeqBAIJ_12_ver2()
1159 sum4 += v[3] * x1 + v[15] * x2 + v[27] * x3 + v[39] * x4; in MatMult_SeqBAIJ_12_ver2()
1160 sum5 += v[4] * x1 + v[16] * x2 + v[28] * x3 + v[40] * x4; in MatMult_SeqBAIJ_12_ver2()
1161 sum6 += v[5] * x1 + v[17] * x2 + v[29] * x3 + v[41] * x4; in MatMult_SeqBAIJ_12_ver2()
1162 sum7 += v[6] * x1 + v[18] * x2 + v[30] * x3 + v[42] * x4; in MatMult_SeqBAIJ_12_ver2()
1163 sum8 += v[7] * x1 + v[19] * x2 + v[31] * x3 + v[43] * x4; in MatMult_SeqBAIJ_12_ver2()
1164 sum9 += v[8] * x1 + v[20] * x2 + v[32] * x3 + v[44] * x4; in MatMult_SeqBAIJ_12_ver2()
1165 sum10 += v[9] * x1 + v[21] * x2 + v[33] * x3 + v[45] * x4; in MatMult_SeqBAIJ_12_ver2()
1166 sum11 += v[10] * x1 + v[22] * x2 + v[34] * x3 + v[46] * x4; in MatMult_SeqBAIJ_12_ver2()
1167 sum12 += v[11] * x1 + v[23] * x2 + v[35] * x3 + v[47] * x4; in MatMult_SeqBAIJ_12_ver2()
1168 v += 48; in MatMult_SeqBAIJ_12_ver2()
1198 const MatScalar *v; in MatMultAdd_SeqBAIJ_12_ver2() local
1207 v = a->a; in MatMultAdd_SeqBAIJ_12_ver2()
1247 sum1 += v[0] * x1 + v[12] * x2 + v[24] * x3 + v[36] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1248 sum2 += v[1] * x1 + v[13] * x2 + v[25] * x3 + v[37] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1249 sum3 += v[2] * x1 + v[14] * x2 + v[26] * x3 + v[38] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1250 sum4 += v[3] * x1 + v[15] * x2 + v[27] * x3 + v[39] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1251 sum5 += v[4] * x1 + v[16] * x2 + v[28] * x3 + v[40] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1252 sum6 += v[5] * x1 + v[17] * x2 + v[29] * x3 + v[41] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1253 sum7 += v[6] * x1 + v[18] * x2 + v[30] * x3 + v[42] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1254 sum8 += v[7] * x1 + v[19] * x2 + v[31] * x3 + v[43] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1255 sum9 += v[8] * x1 + v[20] * x2 + v[32] * x3 + v[44] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1256 sum10 += v[9] * x1 + v[21] * x2 + v[33] * x3 + v[45] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1257 sum11 += v[10] * x1 + v[22] * x2 + v[34] * x3 + v[46] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1258 sum12 += v[11] * x1 + v[23] * x2 + v[35] * x3 + v[47] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1259 v += 48; in MatMultAdd_SeqBAIJ_12_ver2()
1266 sum1 += v[0] * x1 + v[12] * x2 + v[24] * x3 + v[36] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1267 sum2 += v[1] * x1 + v[13] * x2 + v[25] * x3 + v[37] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1268 sum3 += v[2] * x1 + v[14] * x2 + v[26] * x3 + v[38] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1269 sum4 += v[3] * x1 + v[15] * x2 + v[27] * x3 + v[39] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1270 sum5 += v[4] * x1 + v[16] * x2 + v[28] * x3 + v[40] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1271 sum6 += v[5] * x1 + v[17] * x2 + v[29] * x3 + v[41] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1272 sum7 += v[6] * x1 + v[18] * x2 + v[30] * x3 + v[42] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1273 sum8 += v[7] * x1 + v[19] * x2 + v[31] * x3 + v[43] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1274 sum9 += v[8] * x1 + v[20] * x2 + v[32] * x3 + v[44] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1275 sum10 += v[9] * x1 + v[21] * x2 + v[33] * x3 + v[45] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1276 sum11 += v[10] * x1 + v[22] * x2 + v[34] * x3 + v[46] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1277 sum12 += v[11] * x1 + v[23] * x2 + v[35] * x3 + v[47] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1278 v += 48; in MatMultAdd_SeqBAIJ_12_ver2()
1284 sum1 += v[0] * x1 + v[12] * x2 + v[24] * x3 + v[36] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1285 sum2 += v[1] * x1 + v[13] * x2 + v[25] * x3 + v[37] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1286 sum3 += v[2] * x1 + v[14] * x2 + v[26] * x3 + v[38] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1287 sum4 += v[3] * x1 + v[15] * x2 + v[27] * x3 + v[39] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1288 sum5 += v[4] * x1 + v[16] * x2 + v[28] * x3 + v[40] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1289 sum6 += v[5] * x1 + v[17] * x2 + v[29] * x3 + v[41] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1290 sum7 += v[6] * x1 + v[18] * x2 + v[30] * x3 + v[42] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1291 sum8 += v[7] * x1 + v[19] * x2 + v[31] * x3 + v[43] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1292 sum9 += v[8] * x1 + v[20] * x2 + v[32] * x3 + v[44] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1293 sum10 += v[9] * x1 + v[21] * x2 + v[33] * x3 + v[45] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1294 sum11 += v[10] * x1 + v[22] * x2 + v[34] * x3 + v[46] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1295 sum12 += v[11] * x1 + v[23] * x2 + v[35] * x3 + v[47] * x4; in MatMultAdd_SeqBAIJ_12_ver2()
1296 v += 48; in MatMultAdd_SeqBAIJ_12_ver2()
1327 const MatScalar *v = a->a; in MatMult_SeqBAIJ_12_AVX2() local
1364 a0 = _mm256_loadu_pd(v + 0); in MatMult_SeqBAIJ_12_AVX2()
1366 a1 = _mm256_loadu_pd(v + 4); in MatMult_SeqBAIJ_12_AVX2()
1368 a2 = _mm256_loadu_pd(v + 8); in MatMult_SeqBAIJ_12_AVX2()
1373 a3 = _mm256_loadu_pd(v + 12); in MatMult_SeqBAIJ_12_AVX2()
1375 a4 = _mm256_loadu_pd(v + 16); in MatMult_SeqBAIJ_12_AVX2()
1377 a5 = _mm256_loadu_pd(v + 20); in MatMult_SeqBAIJ_12_AVX2()
1382 a0 = _mm256_loadu_pd(v + 24); in MatMult_SeqBAIJ_12_AVX2()
1384 a1 = _mm256_loadu_pd(v + 28); in MatMult_SeqBAIJ_12_AVX2()
1386 a2 = _mm256_loadu_pd(v + 32); in MatMult_SeqBAIJ_12_AVX2()
1391 a3 = _mm256_loadu_pd(v + 36); in MatMult_SeqBAIJ_12_AVX2()
1393 a4 = _mm256_loadu_pd(v + 40); in MatMult_SeqBAIJ_12_AVX2()
1395 a5 = _mm256_loadu_pd(v + 44); in MatMult_SeqBAIJ_12_AVX2()
1400 a0 = _mm256_loadu_pd(v + 48); in MatMult_SeqBAIJ_12_AVX2()
1402 a1 = _mm256_loadu_pd(v + 52); in MatMult_SeqBAIJ_12_AVX2()
1404 a2 = _mm256_loadu_pd(v + 56); in MatMult_SeqBAIJ_12_AVX2()
1409 a3 = _mm256_loadu_pd(v + 60); in MatMult_SeqBAIJ_12_AVX2()
1411 a4 = _mm256_loadu_pd(v + 64); in MatMult_SeqBAIJ_12_AVX2()
1413 a5 = _mm256_loadu_pd(v + 68); in MatMult_SeqBAIJ_12_AVX2()
1418 a0 = _mm256_loadu_pd(v + 72); in MatMult_SeqBAIJ_12_AVX2()
1420 a1 = _mm256_loadu_pd(v + 76); in MatMult_SeqBAIJ_12_AVX2()
1422 a2 = _mm256_loadu_pd(v + 80); in MatMult_SeqBAIJ_12_AVX2()
1427 a3 = _mm256_loadu_pd(v + 84); in MatMult_SeqBAIJ_12_AVX2()
1429 a4 = _mm256_loadu_pd(v + 88); in MatMult_SeqBAIJ_12_AVX2()
1431 a5 = _mm256_loadu_pd(v + 92); in MatMult_SeqBAIJ_12_AVX2()
1436 a0 = _mm256_loadu_pd(v + 96); in MatMult_SeqBAIJ_12_AVX2()
1438 a1 = _mm256_loadu_pd(v + 100); in MatMult_SeqBAIJ_12_AVX2()
1440 a2 = _mm256_loadu_pd(v + 104); in MatMult_SeqBAIJ_12_AVX2()
1445 a3 = _mm256_loadu_pd(v + 108); in MatMult_SeqBAIJ_12_AVX2()
1447 a4 = _mm256_loadu_pd(v + 112); in MatMult_SeqBAIJ_12_AVX2()
1449 a5 = _mm256_loadu_pd(v + 116); in MatMult_SeqBAIJ_12_AVX2()
1454 a0 = _mm256_loadu_pd(v + 120); in MatMult_SeqBAIJ_12_AVX2()
1456 a1 = _mm256_loadu_pd(v + 124); in MatMult_SeqBAIJ_12_AVX2()
1458 a2 = _mm256_loadu_pd(v + 128); in MatMult_SeqBAIJ_12_AVX2()
1463 a3 = _mm256_loadu_pd(v + 132); in MatMult_SeqBAIJ_12_AVX2()
1465 a4 = _mm256_loadu_pd(v + 136); in MatMult_SeqBAIJ_12_AVX2()
1467 a5 = _mm256_loadu_pd(v + 140); in MatMult_SeqBAIJ_12_AVX2()
1470 v += bs2; in MatMult_SeqBAIJ_12_AVX2()
1493 const MatScalar *v; in MatMult_SeqBAIJ_15_ver1() local
1502 v = a->a; in MatMult_SeqBAIJ_15_ver1()
1538 sum1 += v[0] * xv; in MatMult_SeqBAIJ_15_ver1()
1539 sum2 += v[1] * xv; in MatMult_SeqBAIJ_15_ver1()
1540 sum3 += v[2] * xv; in MatMult_SeqBAIJ_15_ver1()
1541 sum4 += v[3] * xv; in MatMult_SeqBAIJ_15_ver1()
1542 sum5 += v[4] * xv; in MatMult_SeqBAIJ_15_ver1()
1543 sum6 += v[5] * xv; in MatMult_SeqBAIJ_15_ver1()
1544 sum7 += v[6] * xv; in MatMult_SeqBAIJ_15_ver1()
1545 sum8 += v[7] * xv; in MatMult_SeqBAIJ_15_ver1()
1546 sum9 += v[8] * xv; in MatMult_SeqBAIJ_15_ver1()
1547 sum10 += v[9] * xv; in MatMult_SeqBAIJ_15_ver1()
1548 sum11 += v[10] * xv; in MatMult_SeqBAIJ_15_ver1()
1549 sum12 += v[11] * xv; in MatMult_SeqBAIJ_15_ver1()
1550 sum13 += v[12] * xv; in MatMult_SeqBAIJ_15_ver1()
1551 sum14 += v[13] * xv; in MatMult_SeqBAIJ_15_ver1()
1552 sum15 += v[14] * xv; in MatMult_SeqBAIJ_15_ver1()
1553 v += 15; in MatMult_SeqBAIJ_15_ver1()
1589 const MatScalar *v; in MatMult_SeqBAIJ_15_ver2() local
1598 v = a->a; in MatMult_SeqBAIJ_15_ver2()
1636 sum1 += v[0] * x1 + v[15] * x2 + v[30] * x3 + v[45] * x4; in MatMult_SeqBAIJ_15_ver2()
1637 sum2 += v[1] * x1 + v[16] * x2 + v[31] * x3 + v[46] * x4; in MatMult_SeqBAIJ_15_ver2()
1638 sum3 += v[2] * x1 + v[17] * x2 + v[32] * x3 + v[47] * x4; in MatMult_SeqBAIJ_15_ver2()
1639 sum4 += v[3] * x1 + v[18] * x2 + v[33] * x3 + v[48] * x4; in MatMult_SeqBAIJ_15_ver2()
1640 sum5 += v[4] * x1 + v[19] * x2 + v[34] * x3 + v[49] * x4; in MatMult_SeqBAIJ_15_ver2()
1641 sum6 += v[5] * x1 + v[20] * x2 + v[35] * x3 + v[50] * x4; in MatMult_SeqBAIJ_15_ver2()
1642 sum7 += v[6] * x1 + v[21] * x2 + v[36] * x3 + v[51] * x4; in MatMult_SeqBAIJ_15_ver2()
1643 sum8 += v[7] * x1 + v[22] * x2 + v[37] * x3 + v[52] * x4; in MatMult_SeqBAIJ_15_ver2()
1644 sum9 += v[8] * x1 + v[23] * x2 + v[38] * x3 + v[53] * x4; in MatMult_SeqBAIJ_15_ver2()
1645 sum10 += v[9] * x1 + v[24] * x2 + v[39] * x3 + v[54] * x4; in MatMult_SeqBAIJ_15_ver2()
1646 sum11 += v[10] * x1 + v[25] * x2 + v[40] * x3 + v[55] * x4; in MatMult_SeqBAIJ_15_ver2()
1647 sum12 += v[11] * x1 + v[26] * x2 + v[41] * x3 + v[56] * x4; in MatMult_SeqBAIJ_15_ver2()
1648 sum13 += v[12] * x1 + v[27] * x2 + v[42] * x3 + v[57] * x4; in MatMult_SeqBAIJ_15_ver2()
1649 sum14 += v[13] * x1 + v[28] * x2 + v[43] * x3 + v[58] * x4; in MatMult_SeqBAIJ_15_ver2()
1650 sum15 += v[14] * x1 + v[29] * x2 + v[44] * x3 + v[59] * x4; in MatMult_SeqBAIJ_15_ver2()
1652 v += 60; in MatMult_SeqBAIJ_15_ver2()
1659 sum1 += v[0] * x1 + v[15] * x2 + v[30] * x3 + v[45] * x4; in MatMult_SeqBAIJ_15_ver2()
1660 sum2 += v[1] * x1 + v[16] * x2 + v[31] * x3 + v[46] * x4; in MatMult_SeqBAIJ_15_ver2()
1661 sum3 += v[2] * x1 + v[17] * x2 + v[32] * x3 + v[47] * x4; in MatMult_SeqBAIJ_15_ver2()
1662 sum4 += v[3] * x1 + v[18] * x2 + v[33] * x3 + v[48] * x4; in MatMult_SeqBAIJ_15_ver2()
1663 sum5 += v[4] * x1 + v[19] * x2 + v[34] * x3 + v[49] * x4; in MatMult_SeqBAIJ_15_ver2()
1664 sum6 += v[5] * x1 + v[20] * x2 + v[35] * x3 + v[50] * x4; in MatMult_SeqBAIJ_15_ver2()
1665 sum7 += v[6] * x1 + v[21] * x2 + v[36] * x3 + v[51] * x4; in MatMult_SeqBAIJ_15_ver2()
1666 sum8 += v[7] * x1 + v[22] * x2 + v[37] * x3 + v[52] * x4; in MatMult_SeqBAIJ_15_ver2()
1667 sum9 += v[8] * x1 + v[23] * x2 + v[38] * x3 + v[53] * x4; in MatMult_SeqBAIJ_15_ver2()
1668 sum10 += v[9] * x1 + v[24] * x2 + v[39] * x3 + v[54] * x4; in MatMult_SeqBAIJ_15_ver2()
1669 sum11 += v[10] * x1 + v[25] * x2 + v[40] * x3 + v[55] * x4; in MatMult_SeqBAIJ_15_ver2()
1670 sum12 += v[11] * x1 + v[26] * x2 + v[41] * x3 + v[56] * x4; in MatMult_SeqBAIJ_15_ver2()
1671 sum13 += v[12] * x1 + v[27] * x2 + v[42] * x3 + v[57] * x4; in MatMult_SeqBAIJ_15_ver2()
1672 sum14 += v[13] * x1 + v[28] * x2 + v[43] * x3 + v[58] * x4; in MatMult_SeqBAIJ_15_ver2()
1673 sum15 += v[14] * x1 + v[29] * x2 + v[44] * x3 + v[59] * x4; in MatMult_SeqBAIJ_15_ver2()
1674 v += 60; in MatMult_SeqBAIJ_15_ver2()
1680 sum1 += v[0] * x1 + v[15] * x2 + v[30] * x3 + v[45] * x4; in MatMult_SeqBAIJ_15_ver2()
1681 sum2 += v[1] * x1 + v[16] * x2 + v[31] * x3 + v[46] * x4; in MatMult_SeqBAIJ_15_ver2()
1682 sum3 += v[2] * x1 + v[17] * x2 + v[32] * x3 + v[47] * x4; in MatMult_SeqBAIJ_15_ver2()
1683 sum4 += v[3] * x1 + v[18] * x2 + v[33] * x3 + v[48] * x4; in MatMult_SeqBAIJ_15_ver2()
1684 sum5 += v[4] * x1 + v[19] * x2 + v[34] * x3 + v[49] * x4; in MatMult_SeqBAIJ_15_ver2()
1685 sum6 += v[5] * x1 + v[20] * x2 + v[35] * x3 + v[50] * x4; in MatMult_SeqBAIJ_15_ver2()
1686 sum7 += v[6] * x1 + v[21] * x2 + v[36] * x3 + v[51] * x4; in MatMult_SeqBAIJ_15_ver2()
1687 sum8 += v[7] * x1 + v[22] * x2 + v[37] * x3 + v[52] * x4; in MatMult_SeqBAIJ_15_ver2()
1688 sum9 += v[8] * x1 + v[23] * x2 + v[38] * x3 + v[53] * x4; in MatMult_SeqBAIJ_15_ver2()
1689 sum10 += v[9] * x1 + v[24] * x2 + v[39] * x3 + v[54] * x4; in MatMult_SeqBAIJ_15_ver2()
1690 sum11 += v[10] * x1 + v[25] * x2 + v[40] * x3 + v[55] * x4; in MatMult_SeqBAIJ_15_ver2()
1691 sum12 += v[11] * x1 + v[26] * x2 + v[41] * x3 + v[56] * x4; in MatMult_SeqBAIJ_15_ver2()
1692 sum13 += v[12] * x1 + v[27] * x2 + v[42] * x3 + v[57] * x4; in MatMult_SeqBAIJ_15_ver2()
1693 sum14 += v[13] * x1 + v[28] * x2 + v[43] * x3 + v[58] * x4; in MatMult_SeqBAIJ_15_ver2()
1694 sum15 += v[14] * x1 + v[29] * x2 + v[44] * x3 + v[59] * x4; in MatMult_SeqBAIJ_15_ver2()
1695 v += 60; in MatMult_SeqBAIJ_15_ver2()
1700 sum1 += v[0] * x1 + v[15] * x2 + v[30] * x3; in MatMult_SeqBAIJ_15_ver2()
1701 sum2 += v[1] * x1 + v[16] * x2 + v[31] * x3; in MatMult_SeqBAIJ_15_ver2()
1702 sum3 += v[2] * x1 + v[17] * x2 + v[32] * x3; in MatMult_SeqBAIJ_15_ver2()
1703 sum4 += v[3] * x1 + v[18] * x2 + v[33] * x3; in MatMult_SeqBAIJ_15_ver2()
1704 sum5 += v[4] * x1 + v[19] * x2 + v[34] * x3; in MatMult_SeqBAIJ_15_ver2()
1705 sum6 += v[5] * x1 + v[20] * x2 + v[35] * x3; in MatMult_SeqBAIJ_15_ver2()
1706 sum7 += v[6] * x1 + v[21] * x2 + v[36] * x3; in MatMult_SeqBAIJ_15_ver2()
1707 sum8 += v[7] * x1 + v[22] * x2 + v[37] * x3; in MatMult_SeqBAIJ_15_ver2()
1708 sum9 += v[8] * x1 + v[23] * x2 + v[38] * x3; in MatMult_SeqBAIJ_15_ver2()
1709 sum10 += v[9] * x1 + v[24] * x2 + v[39] * x3; in MatMult_SeqBAIJ_15_ver2()
1710 sum11 += v[10] * x1 + v[25] * x2 + v[40] * x3; in MatMult_SeqBAIJ_15_ver2()
1711 sum12 += v[11] * x1 + v[26] * x2 + v[41] * x3; in MatMult_SeqBAIJ_15_ver2()
1712 sum13 += v[12] * x1 + v[27] * x2 + v[42] * x3; in MatMult_SeqBAIJ_15_ver2()
1713 sum14 += v[13] * x1 + v[28] * x2 + v[43] * x3; in MatMult_SeqBAIJ_15_ver2()
1714 sum15 += v[14] * x1 + v[29] * x2 + v[44] * x3; in MatMult_SeqBAIJ_15_ver2()
1715 v += 45; in MatMult_SeqBAIJ_15_ver2()
1750 const MatScalar *v; in MatMult_SeqBAIJ_15_ver3() local
1759 v = a->a; in MatMult_SeqBAIJ_15_ver3()
1801 …sum1 += v[0] * x1 + v[15] * x2 + v[30] * x3 + v[45] * x4 + v[60] * x5 + v[75] * x6 + v[90] * x7 + in MatMult_SeqBAIJ_15_ver3()
1802 …sum2 += v[1] * x1 + v[16] * x2 + v[31] * x3 + v[46] * x4 + v[61] * x5 + v[76] * x6 + v[91] * x7 + in MatMult_SeqBAIJ_15_ver3()
1803 …sum3 += v[2] * x1 + v[17] * x2 + v[32] * x3 + v[47] * x4 + v[62] * x5 + v[77] * x6 + v[92] * x7 + in MatMult_SeqBAIJ_15_ver3()
1804 …sum4 += v[3] * x1 + v[18] * x2 + v[33] * x3 + v[48] * x4 + v[63] * x5 + v[78] * x6 + v[93] * x7 + in MatMult_SeqBAIJ_15_ver3()
1805 …sum5 += v[4] * x1 + v[19] * x2 + v[34] * x3 + v[49] * x4 + v[64] * x5 + v[79] * x6 + v[94] * x7 + in MatMult_SeqBAIJ_15_ver3()
1806 …sum6 += v[5] * x1 + v[20] * x2 + v[35] * x3 + v[50] * x4 + v[65] * x5 + v[80] * x6 + v[95] * x7 + in MatMult_SeqBAIJ_15_ver3()
1807 …sum7 += v[6] * x1 + v[21] * x2 + v[36] * x3 + v[51] * x4 + v[66] * x5 + v[81] * x6 + v[96] * x7 + in MatMult_SeqBAIJ_15_ver3()
1808 …sum8 += v[7] * x1 + v[22] * x2 + v[37] * x3 + v[52] * x4 + v[67] * x5 + v[82] * x6 + v[97] * x7 + in MatMult_SeqBAIJ_15_ver3()
1809 …sum9 += v[8] * x1 + v[23] * x2 + v[38] * x3 + v[53] * x4 + v[68] * x5 + v[83] * x6 + v[98] * x7 + in MatMult_SeqBAIJ_15_ver3()
1810 …sum10 += v[9] * x1 + v[24] * x2 + v[39] * x3 + v[54] * x4 + v[69] * x5 + v[84] * x6 + v[99] * x7 +… in MatMult_SeqBAIJ_15_ver3()
1811 …sum11 += v[10] * x1 + v[25] * x2 + v[40] * x3 + v[55] * x4 + v[70] * x5 + v[85] * x6 + v[100] * x7… in MatMult_SeqBAIJ_15_ver3()
1812 …sum12 += v[11] * x1 + v[26] * x2 + v[41] * x3 + v[56] * x4 + v[71] * x5 + v[86] * x6 + v[101] * x7… in MatMult_SeqBAIJ_15_ver3()
1813 …sum13 += v[12] * x1 + v[27] * x2 + v[42] * x3 + v[57] * x4 + v[72] * x5 + v[87] * x6 + v[102] * x7… in MatMult_SeqBAIJ_15_ver3()
1814 …sum14 += v[13] * x1 + v[28] * x2 + v[43] * x3 + v[58] * x4 + v[73] * x5 + v[88] * x6 + v[103] * x7… in MatMult_SeqBAIJ_15_ver3()
1815 …sum15 += v[14] * x1 + v[29] * x2 + v[44] * x3 + v[59] * x4 + v[74] * x5 + v[89] * x6 + v[104] * x7… in MatMult_SeqBAIJ_15_ver3()
1816 v += 120; in MatMult_SeqBAIJ_15_ver3()
1826 … sum1 += v[0] * x1 + v[15] * x2 + v[30] * x3 + v[45] * x4 + v[60] * x5 + v[75] * x6 + v[90] * x7; in MatMult_SeqBAIJ_15_ver3()
1827 … sum2 += v[1] * x1 + v[16] * x2 + v[31] * x3 + v[46] * x4 + v[61] * x5 + v[76] * x6 + v[91] * x7; in MatMult_SeqBAIJ_15_ver3()
1828 … sum3 += v[2] * x1 + v[17] * x2 + v[32] * x3 + v[47] * x4 + v[62] * x5 + v[77] * x6 + v[92] * x7; in MatMult_SeqBAIJ_15_ver3()
1829 … sum4 += v[3] * x1 + v[18] * x2 + v[33] * x3 + v[48] * x4 + v[63] * x5 + v[78] * x6 + v[93] * x7; in MatMult_SeqBAIJ_15_ver3()
1830 … sum5 += v[4] * x1 + v[19] * x2 + v[34] * x3 + v[49] * x4 + v[64] * x5 + v[79] * x6 + v[94] * x7; in MatMult_SeqBAIJ_15_ver3()
1831 … sum6 += v[5] * x1 + v[20] * x2 + v[35] * x3 + v[50] * x4 + v[65] * x5 + v[80] * x6 + v[95] * x7; in MatMult_SeqBAIJ_15_ver3()
1832 … sum7 += v[6] * x1 + v[21] * x2 + v[36] * x3 + v[51] * x4 + v[66] * x5 + v[81] * x6 + v[96] * x7; in MatMult_SeqBAIJ_15_ver3()
1833 … sum8 += v[7] * x1 + v[22] * x2 + v[37] * x3 + v[52] * x4 + v[67] * x5 + v[82] * x6 + v[97] * x7; in MatMult_SeqBAIJ_15_ver3()
1834 … sum9 += v[8] * x1 + v[23] * x2 + v[38] * x3 + v[53] * x4 + v[68] * x5 + v[83] * x6 + v[98] * x7; in MatMult_SeqBAIJ_15_ver3()
1835 … sum10 += v[9] * x1 + v[24] * x2 + v[39] * x3 + v[54] * x4 + v[69] * x5 + v[84] * x6 + v[99] * x7; in MatMult_SeqBAIJ_15_ver3()
1836 …sum11 += v[10] * x1 + v[25] * x2 + v[40] * x3 + v[55] * x4 + v[70] * x5 + v[85] * x6 + v[100] * x7; in MatMult_SeqBAIJ_15_ver3()
1837 …sum12 += v[11] * x1 + v[26] * x2 + v[41] * x3 + v[56] * x4 + v[71] * x5 + v[86] * x6 + v[101] * x7; in MatMult_SeqBAIJ_15_ver3()
1838 …sum13 += v[12] * x1 + v[27] * x2 + v[42] * x3 + v[57] * x4 + v[72] * x5 + v[87] * x6 + v[102] * x7; in MatMult_SeqBAIJ_15_ver3()
1839 …sum14 += v[13] * x1 + v[28] * x2 + v[43] * x3 + v[58] * x4 + v[73] * x5 + v[88] * x6 + v[103] * x7; in MatMult_SeqBAIJ_15_ver3()
1840 …sum15 += v[14] * x1 + v[29] * x2 + v[44] * x3 + v[59] * x4 + v[74] * x5 + v[89] * x6 + v[104] * x7; in MatMult_SeqBAIJ_15_ver3()
1841 v += 105; in MatMult_SeqBAIJ_15_ver3()
1876 const MatScalar *v; in MatMult_SeqBAIJ_15_ver4() local
1885 v = a->a; in MatMult_SeqBAIJ_15_ver4()
1934v[0] * x1 + v[15] * x2 + v[30] * x3 + v[45] * x4 + v[60] * x5 + v[75] * x6 + v[90] * x7 + v[105] *… in MatMult_SeqBAIJ_15_ver4()
1935v[1] * x1 + v[16] * x2 + v[31] * x3 + v[46] * x4 + v[61] * x5 + v[76] * x6 + v[91] * x7 + v[106] *… in MatMult_SeqBAIJ_15_ver4()
1936v[2] * x1 + v[17] * x2 + v[32] * x3 + v[47] * x4 + v[62] * x5 + v[77] * x6 + v[92] * x7 + v[107] *… in MatMult_SeqBAIJ_15_ver4()
1937v[3] * x1 + v[18] * x2 + v[33] * x3 + v[48] * x4 + v[63] * x5 + v[78] * x6 + v[93] * x7 + v[108] *… in MatMult_SeqBAIJ_15_ver4()
1938v[4] * x1 + v[19] * x2 + v[34] * x3 + v[49] * x4 + v[64] * x5 + v[79] * x6 + v[94] * x7 + v[109] *… in MatMult_SeqBAIJ_15_ver4()
1939v[5] * x1 + v[20] * x2 + v[35] * x3 + v[50] * x4 + v[65] * x5 + v[80] * x6 + v[95] * x7 + v[110] *… in MatMult_SeqBAIJ_15_ver4()
1940v[6] * x1 + v[21] * x2 + v[36] * x3 + v[51] * x4 + v[66] * x5 + v[81] * x6 + v[96] * x7 + v[111] *… in MatMult_SeqBAIJ_15_ver4()
1941v[7] * x1 + v[22] * x2 + v[37] * x3 + v[52] * x4 + v[67] * x5 + v[82] * x6 + v[97] * x7 + v[112] *… in MatMult_SeqBAIJ_15_ver4()
1942v[8] * x1 + v[23] * x2 + v[38] * x3 + v[53] * x4 + v[68] * x5 + v[83] * x6 + v[98] * x7 + v[113] *… in MatMult_SeqBAIJ_15_ver4()
1943v[9] * x1 + v[24] * x2 + v[39] * x3 + v[54] * x4 + v[69] * x5 + v[84] * x6 + v[99] * x7 + v[114] *… in MatMult_SeqBAIJ_15_ver4()
1944v[10] * x1 + v[25] * x2 + v[40] * x3 + v[55] * x4 + v[70] * x5 + v[85] * x6 + v[100] * x7 + v[115]… in MatMult_SeqBAIJ_15_ver4()
1945v[11] * x1 + v[26] * x2 + v[41] * x3 + v[56] * x4 + v[71] * x5 + v[86] * x6 + v[101] * x7 + v[116]… in MatMult_SeqBAIJ_15_ver4()
1946v[12] * x1 + v[27] * x2 + v[42] * x3 + v[57] * x4 + v[72] * x5 + v[87] * x6 + v[102] * x7 + v[117]… in MatMult_SeqBAIJ_15_ver4()
1947v[13] * x1 + v[28] * x2 + v[43] * x3 + v[58] * x4 + v[73] * x5 + v[88] * x6 + v[103] * x7 + v[118]… in MatMult_SeqBAIJ_15_ver4()
1948v[14] * x1 + v[29] * x2 + v[44] * x3 + v[59] * x4 + v[74] * x5 + v[89] * x6 + v[104] * x7 + v[119]… in MatMult_SeqBAIJ_15_ver4()
1949 v += 225; in MatMult_SeqBAIJ_15_ver4()
1985 const MatScalar *v; in MatMult_SeqBAIJ_N() local
1996 v = a->a; in MatMult_SeqBAIJ_N()
2024 PetscKernel_w_gets_Ar_times_v(bs, ncols, work, v, z); in MatMult_SeqBAIJ_N()
2025 v += n * bs2; in MatMult_SeqBAIJ_N()
2039 const MatScalar *v; in MatMultAdd_SeqBAIJ_1() local
2049 v = a->a; in MatMultAdd_SeqBAIJ_1()
2068 PetscPrefetchBlock(v + n, n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMultAdd_SeqBAIJ_1()
2069 PetscSparseDensePlusDot(sum, x, v, idx, n); in MatMultAdd_SeqBAIJ_1()
2070 v += n; in MatMultAdd_SeqBAIJ_1()
2090 const MatScalar *v; in MatMultAdd_SeqBAIJ_2() local
2100 v = a->a; in MatMultAdd_SeqBAIJ_2()
2122 PetscPrefetchBlock(v + 4 * n, 4 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMultAdd_SeqBAIJ_2()
2128 sum1 += v[0] * x1 + v[2] * x2; in MatMultAdd_SeqBAIJ_2()
2129 sum2 += v[1] * x1 + v[3] * x2; in MatMultAdd_SeqBAIJ_2()
2130 v += 4; in MatMultAdd_SeqBAIJ_2()
2150 const MatScalar *v; in MatMultAdd_SeqBAIJ_3() local
2160 v = a->a; in MatMultAdd_SeqBAIJ_3()
2183 PetscPrefetchBlock(v + 9 * n, 9 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMultAdd_SeqBAIJ_3()
2189 sum1 += v[0] * x1 + v[3] * x2 + v[6] * x3; in MatMultAdd_SeqBAIJ_3()
2190 sum2 += v[1] * x1 + v[4] * x2 + v[7] * x3; in MatMultAdd_SeqBAIJ_3()
2191 sum3 += v[2] * x1 + v[5] * x2 + v[8] * x3; in MatMultAdd_SeqBAIJ_3()
2192 v += 9; in MatMultAdd_SeqBAIJ_3()
2213 const MatScalar *v; in MatMultAdd_SeqBAIJ_4() local
2223 v = a->a; in MatMultAdd_SeqBAIJ_4()
2247 … PetscPrefetchBlock(v + 16 * n, 16 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMultAdd_SeqBAIJ_4()
2254 sum1 += v[0] * x1 + v[4] * x2 + v[8] * x3 + v[12] * x4; in MatMultAdd_SeqBAIJ_4()
2255 sum2 += v[1] * x1 + v[5] * x2 + v[9] * x3 + v[13] * x4; in MatMultAdd_SeqBAIJ_4()
2256 sum3 += v[2] * x1 + v[6] * x2 + v[10] * x3 + v[14] * x4; in MatMultAdd_SeqBAIJ_4()
2257 sum4 += v[3] * x1 + v[7] * x2 + v[11] * x3 + v[15] * x4; in MatMultAdd_SeqBAIJ_4()
2258 v += 16; in MatMultAdd_SeqBAIJ_4()
2281 const MatScalar *v; in MatMultAdd_SeqBAIJ_5() local
2291 v = a->a; in MatMultAdd_SeqBAIJ_5()
2316 … PetscPrefetchBlock(v + 25 * n, 25 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMultAdd_SeqBAIJ_5()
2324 sum1 += v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5; in MatMultAdd_SeqBAIJ_5()
2325 sum2 += v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5; in MatMultAdd_SeqBAIJ_5()
2326 sum3 += v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5; in MatMultAdd_SeqBAIJ_5()
2327 sum4 += v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5; in MatMultAdd_SeqBAIJ_5()
2328 sum5 += v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5; in MatMultAdd_SeqBAIJ_5()
2329 v += 25; in MatMultAdd_SeqBAIJ_5()
2353 const MatScalar *v; in MatMultAdd_SeqBAIJ_6() local
2363 v = a->a; in MatMultAdd_SeqBAIJ_6()
2389 … PetscPrefetchBlock(v + 36 * n, 36 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMultAdd_SeqBAIJ_6()
2398 sum1 += v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6; in MatMultAdd_SeqBAIJ_6()
2399 sum2 += v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6; in MatMultAdd_SeqBAIJ_6()
2400 sum3 += v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6; in MatMultAdd_SeqBAIJ_6()
2401 sum4 += v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6; in MatMultAdd_SeqBAIJ_6()
2402 sum5 += v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6; in MatMultAdd_SeqBAIJ_6()
2403 sum6 += v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6; in MatMultAdd_SeqBAIJ_6()
2404 v += 36; in MatMultAdd_SeqBAIJ_6()
2429 const MatScalar *v; in MatMultAdd_SeqBAIJ_7() local
2439 v = a->a; in MatMultAdd_SeqBAIJ_7()
2466 … PetscPrefetchBlock(v + 49 * n, 49 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMultAdd_SeqBAIJ_7()
2476 … sum1 += v[0] * x1 + v[7] * x2 + v[14] * x3 + v[21] * x4 + v[28] * x5 + v[35] * x6 + v[42] * x7; in MatMultAdd_SeqBAIJ_7()
2477 … sum2 += v[1] * x1 + v[8] * x2 + v[15] * x3 + v[22] * x4 + v[29] * x5 + v[36] * x6 + v[43] * x7; in MatMultAdd_SeqBAIJ_7()
2478 … sum3 += v[2] * x1 + v[9] * x2 + v[16] * x3 + v[23] * x4 + v[30] * x5 + v[37] * x6 + v[44] * x7; in MatMultAdd_SeqBAIJ_7()
2479 … sum4 += v[3] * x1 + v[10] * x2 + v[17] * x3 + v[24] * x4 + v[31] * x5 + v[38] * x6 + v[45] * x7; in MatMultAdd_SeqBAIJ_7()
2480 … sum5 += v[4] * x1 + v[11] * x2 + v[18] * x3 + v[25] * x4 + v[32] * x5 + v[39] * x6 + v[46] * x7; in MatMultAdd_SeqBAIJ_7()
2481 … sum6 += v[5] * x1 + v[12] * x2 + v[19] * x3 + v[26] * x4 + v[33] * x5 + v[40] * x6 + v[47] * x7; in MatMultAdd_SeqBAIJ_7()
2482 … sum7 += v[6] * x1 + v[13] * x2 + v[20] * x3 + v[27] * x4 + v[34] * x5 + v[41] * x6 + v[48] * x7; in MatMultAdd_SeqBAIJ_7()
2483 v += 49; in MatMultAdd_SeqBAIJ_7()
2509 const MatScalar *v; in MatMultAdd_SeqBAIJ_9_AVX2() local
2526 v = a->a; in MatMultAdd_SeqBAIJ_9_AVX2()
2561 a0 = _mm256_loadu_pd(&v[j * 81]); in MatMultAdd_SeqBAIJ_9_AVX2()
2563 a1 = _mm256_loadu_pd(&v[j * 81 + 4]); in MatMultAdd_SeqBAIJ_9_AVX2()
2565 a2 = _mm256_loadu_pd(&v[j * 81 + 8]); in MatMultAdd_SeqBAIJ_9_AVX2()
2570 a0 = _mm256_loadu_pd(&v[j * 81 + 9]); in MatMultAdd_SeqBAIJ_9_AVX2()
2572 a1 = _mm256_loadu_pd(&v[j * 81 + 13]); in MatMultAdd_SeqBAIJ_9_AVX2()
2574 a2 = _mm256_loadu_pd(&v[j * 81 + 17]); in MatMultAdd_SeqBAIJ_9_AVX2()
2579 a3 = _mm256_loadu_pd(&v[j * 81 + 18]); in MatMultAdd_SeqBAIJ_9_AVX2()
2581 a4 = _mm256_loadu_pd(&v[j * 81 + 22]); in MatMultAdd_SeqBAIJ_9_AVX2()
2583 a5 = _mm256_loadu_pd(&v[j * 81 + 26]); in MatMultAdd_SeqBAIJ_9_AVX2()
2588 a0 = _mm256_loadu_pd(&v[j * 81 + 27]); in MatMultAdd_SeqBAIJ_9_AVX2()
2590 a1 = _mm256_loadu_pd(&v[j * 81 + 31]); in MatMultAdd_SeqBAIJ_9_AVX2()
2592 a2 = _mm256_loadu_pd(&v[j * 81 + 35]); in MatMultAdd_SeqBAIJ_9_AVX2()
2597 a3 = _mm256_loadu_pd(&v[j * 81 + 36]); in MatMultAdd_SeqBAIJ_9_AVX2()
2599 a4 = _mm256_loadu_pd(&v[j * 81 + 40]); in MatMultAdd_SeqBAIJ_9_AVX2()
2601 a5 = _mm256_loadu_pd(&v[j * 81 + 44]); in MatMultAdd_SeqBAIJ_9_AVX2()
2606 a0 = _mm256_loadu_pd(&v[j * 81 + 45]); in MatMultAdd_SeqBAIJ_9_AVX2()
2608 a1 = _mm256_loadu_pd(&v[j * 81 + 49]); in MatMultAdd_SeqBAIJ_9_AVX2()
2610 a2 = _mm256_loadu_pd(&v[j * 81 + 53]); in MatMultAdd_SeqBAIJ_9_AVX2()
2615 a0 = _mm256_loadu_pd(&v[j * 81 + 54]); in MatMultAdd_SeqBAIJ_9_AVX2()
2617 a1 = _mm256_loadu_pd(&v[j * 81 + 58]); in MatMultAdd_SeqBAIJ_9_AVX2()
2619 a2 = _mm256_loadu_pd(&v[j * 81 + 62]); in MatMultAdd_SeqBAIJ_9_AVX2()
2624 a3 = _mm256_loadu_pd(&v[j * 81 + 63]); in MatMultAdd_SeqBAIJ_9_AVX2()
2626 a4 = _mm256_loadu_pd(&v[j * 81 + 67]); in MatMultAdd_SeqBAIJ_9_AVX2()
2628 a5 = _mm256_loadu_pd(&v[j * 81 + 71]); in MatMultAdd_SeqBAIJ_9_AVX2()
2633 a0 = _mm256_loadu_pd(&v[j * 81 + 72]); in MatMultAdd_SeqBAIJ_9_AVX2()
2635 a1 = _mm256_loadu_pd(&v[j * 81 + 76]); in MatMultAdd_SeqBAIJ_9_AVX2()
2637 a2 = _mm256_maskload_pd(&v[j * 81 + 80], mask1); in MatMultAdd_SeqBAIJ_9_AVX2()
2645 v += n * bs2; in MatMultAdd_SeqBAIJ_9_AVX2()
2661 const MatScalar *v; in MatMultAdd_SeqBAIJ_11() local
2671 v = a->a; in MatMultAdd_SeqBAIJ_11()
2702 …PetscPrefetchBlock(v + 121 * n, 121 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row … in MatMultAdd_SeqBAIJ_11()
2716v[0] * x1 + v[11] * x2 + v[2 * 11] * x3 + v[3 * 11] * x4 + v[4 * 11] * x5 + v[5 * 11] * x6 + v[6 *… in MatMultAdd_SeqBAIJ_11()
2717v[1 + 0] * x1 + v[1 + 11] * x2 + v[1 + 2 * 11] * x3 + v[1 + 3 * 11] * x4 + v[1 + 4 * 11] * x5 + v[… in MatMultAdd_SeqBAIJ_11()
2718v[2 + 0] * x1 + v[2 + 11] * x2 + v[2 + 2 * 11] * x3 + v[2 + 3 * 11] * x4 + v[2 + 4 * 11] * x5 + v[… in MatMultAdd_SeqBAIJ_11()
2719v[3 + 0] * x1 + v[3 + 11] * x2 + v[3 + 2 * 11] * x3 + v[3 + 3 * 11] * x4 + v[3 + 4 * 11] * x5 + v[… in MatMultAdd_SeqBAIJ_11()
2720v[4 + 0] * x1 + v[4 + 11] * x2 + v[4 + 2 * 11] * x3 + v[4 + 3 * 11] * x4 + v[4 + 4 * 11] * x5 + v[… in MatMultAdd_SeqBAIJ_11()
2721v[5 + 0] * x1 + v[5 + 11] * x2 + v[5 + 2 * 11] * x3 + v[5 + 3 * 11] * x4 + v[5 + 4 * 11] * x5 + v[… in MatMultAdd_SeqBAIJ_11()
2722v[6 + 0] * x1 + v[6 + 11] * x2 + v[6 + 2 * 11] * x3 + v[6 + 3 * 11] * x4 + v[6 + 4 * 11] * x5 + v[… in MatMultAdd_SeqBAIJ_11()
2723v[7 + 0] * x1 + v[7 + 11] * x2 + v[7 + 2 * 11] * x3 + v[7 + 3 * 11] * x4 + v[7 + 4 * 11] * x5 + v[… in MatMultAdd_SeqBAIJ_11()
2724v[8 + 0] * x1 + v[8 + 11] * x2 + v[8 + 2 * 11] * x3 + v[8 + 3 * 11] * x4 + v[8 + 4 * 11] * x5 + v[… in MatMultAdd_SeqBAIJ_11()
2725v[9 + 0] * x1 + v[9 + 11] * x2 + v[9 + 2 * 11] * x3 + v[9 + 3 * 11] * x4 + v[9 + 4 * 11] * x5 + v[… in MatMultAdd_SeqBAIJ_11()
2726v[10 + 0] * x1 + v[10 + 11] * x2 + v[10 + 2 * 11] * x3 + v[10 + 3 * 11] * x4 + v[10 + 4 * 11] * x5… in MatMultAdd_SeqBAIJ_11()
2727 v += 121; in MatMultAdd_SeqBAIJ_11()
2756 const MatScalar *v; in MatMultAdd_SeqBAIJ_N() local
2768 v = a->a; in MatMultAdd_SeqBAIJ_N()
2795 PetscKernel_w_gets_w_plus_Ar_times_v(bs, ncols, work, v, z); in MatMultAdd_SeqBAIJ_N()
2796 v += n * bs2; in MatMultAdd_SeqBAIJ_N()
2830 const MatScalar *v; in MatMultHermitianTransposeAdd_SeqBAIJ() local
2842 v = a->a; in MatMultHermitianTransposeAdd_SeqBAIJ()
2863 z[rval] += PetscConj(*v) * x1; in MatMultHermitianTransposeAdd_SeqBAIJ()
2864 v++; in MatMultHermitianTransposeAdd_SeqBAIJ()
2879 z[rval++] += PetscConj(v[0]) * x1 + PetscConj(v[1]) * x2; in MatMultHermitianTransposeAdd_SeqBAIJ()
2880 z[rval++] += PetscConj(v[2]) * x1 + PetscConj(v[3]) * x2; in MatMultHermitianTransposeAdd_SeqBAIJ()
2881 v += 4; in MatMultHermitianTransposeAdd_SeqBAIJ()
2897 z[rval++] += PetscConj(v[0]) * x1 + PetscConj(v[1]) * x2 + PetscConj(v[2]) * x3; in MatMultHermitianTransposeAdd_SeqBAIJ()
2898 z[rval++] += PetscConj(v[3]) * x1 + PetscConj(v[4]) * x2 + PetscConj(v[5]) * x3; in MatMultHermitianTransposeAdd_SeqBAIJ()
2899 z[rval++] += PetscConj(v[6]) * x1 + PetscConj(v[7]) * x2 + PetscConj(v[8]) * x3; in MatMultHermitianTransposeAdd_SeqBAIJ()
2900 v += 9; in MatMultHermitianTransposeAdd_SeqBAIJ()
2917 …z[rval++] += PetscConj(v[0]) * x1 + PetscConj(v[1]) * x2 + PetscConj(v[2]) * x3 + PetscConj(v[3]) … in MatMultHermitianTransposeAdd_SeqBAIJ()
2918 …z[rval++] += PetscConj(v[4]) * x1 + PetscConj(v[5]) * x2 + PetscConj(v[6]) * x3 + PetscConj(v[7]) … in MatMultHermitianTransposeAdd_SeqBAIJ()
2919 …z[rval++] += PetscConj(v[8]) * x1 + PetscConj(v[9]) * x2 + PetscConj(v[10]) * x3 + PetscConj(v[11]… in MatMultHermitianTransposeAdd_SeqBAIJ()
2920 …z[rval++] += PetscConj(v[12]) * x1 + PetscConj(v[13]) * x2 + PetscConj(v[14]) * x3 + PetscConj(v[1… in MatMultHermitianTransposeAdd_SeqBAIJ()
2921 v += 16; in MatMultHermitianTransposeAdd_SeqBAIJ()
2939 …z[rval++] += PetscConj(v[0]) * x1 + PetscConj(v[1]) * x2 + PetscConj(v[2]) * x3 + PetscConj(v[3]) … in MatMultHermitianTransposeAdd_SeqBAIJ()
2940 …z[rval++] += PetscConj(v[5]) * x1 + PetscConj(v[6]) * x2 + PetscConj(v[7]) * x3 + PetscConj(v[8]) … in MatMultHermitianTransposeAdd_SeqBAIJ()
2941 …z[rval++] += PetscConj(v[10]) * x1 + PetscConj(v[11]) * x2 + PetscConj(v[12]) * x3 + PetscConj(v[1… in MatMultHermitianTransposeAdd_SeqBAIJ()
2942 …z[rval++] += PetscConj(v[15]) * x1 + PetscConj(v[16]) * x2 + PetscConj(v[17]) * x3 + PetscConj(v[1… in MatMultHermitianTransposeAdd_SeqBAIJ()
2943 …z[rval++] += PetscConj(v[20]) * x1 + PetscConj(v[21]) * x2 + PetscConj(v[22]) * x3 + PetscConj(v[2… in MatMultHermitianTransposeAdd_SeqBAIJ()
2944 v += 25; in MatMultHermitianTransposeAdd_SeqBAIJ()
2967 PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work); in MatMultHermitianTransposeAdd_SeqBAIJ()
2968 v += n*bs2; in MatMultHermitianTransposeAdd_SeqBAIJ()
2991 const MatScalar *v; in MatMultTransposeAdd_SeqBAIJ() local
3003 v = a->a; in MatMultTransposeAdd_SeqBAIJ()
3024 z[rval] += *v * x1; in MatMultTransposeAdd_SeqBAIJ()
3025 v++; in MatMultTransposeAdd_SeqBAIJ()
3040 z[rval++] += v[0] * x1 + v[1] * x2; in MatMultTransposeAdd_SeqBAIJ()
3041 z[rval++] += v[2] * x1 + v[3] * x2; in MatMultTransposeAdd_SeqBAIJ()
3042 v += 4; in MatMultTransposeAdd_SeqBAIJ()
3058 z[rval++] += v[0] * x1 + v[1] * x2 + v[2] * x3; in MatMultTransposeAdd_SeqBAIJ()
3059 z[rval++] += v[3] * x1 + v[4] * x2 + v[5] * x3; in MatMultTransposeAdd_SeqBAIJ()
3060 z[rval++] += v[6] * x1 + v[7] * x2 + v[8] * x3; in MatMultTransposeAdd_SeqBAIJ()
3061 v += 9; in MatMultTransposeAdd_SeqBAIJ()
3078 z[rval++] += v[0] * x1 + v[1] * x2 + v[2] * x3 + v[3] * x4; in MatMultTransposeAdd_SeqBAIJ()
3079 z[rval++] += v[4] * x1 + v[5] * x2 + v[6] * x3 + v[7] * x4; in MatMultTransposeAdd_SeqBAIJ()
3080 z[rval++] += v[8] * x1 + v[9] * x2 + v[10] * x3 + v[11] * x4; in MatMultTransposeAdd_SeqBAIJ()
3081 z[rval++] += v[12] * x1 + v[13] * x2 + v[14] * x3 + v[15] * x4; in MatMultTransposeAdd_SeqBAIJ()
3082 v += 16; in MatMultTransposeAdd_SeqBAIJ()
3100 z[rval++] += v[0] * x1 + v[1] * x2 + v[2] * x3 + v[3] * x4 + v[4] * x5; in MatMultTransposeAdd_SeqBAIJ()
3101 z[rval++] += v[5] * x1 + v[6] * x2 + v[7] * x3 + v[8] * x4 + v[9] * x5; in MatMultTransposeAdd_SeqBAIJ()
3102 z[rval++] += v[10] * x1 + v[11] * x2 + v[12] * x3 + v[13] * x4 + v[14] * x5; in MatMultTransposeAdd_SeqBAIJ()
3103 z[rval++] += v[15] * x1 + v[16] * x2 + v[17] * x3 + v[18] * x4 + v[19] * x5; in MatMultTransposeAdd_SeqBAIJ()
3104 z[rval++] += v[20] * x1 + v[21] * x2 + v[22] * x3 + v[23] * x4 + v[24] * x5; in MatMultTransposeAdd_SeqBAIJ()
3105 v += 25; in MatMultTransposeAdd_SeqBAIJ()
3126 PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs, ncols, xtmp, v, work); in MatMultTransposeAdd_SeqBAIJ()
3127 v += n * bs2; in MatMultTransposeAdd_SeqBAIJ()
3161 MatScalar *v = a->a; in MatNorm_SeqBAIJ() local
3169 PetscCallBLAS("BLASnrm2", *norm = BLASnrm2_(&cnt, v, &one)); in MatNorm_SeqBAIJ()
3172 sum += PetscRealPart(PetscConj(*v) * (*v)); in MatNorm_SeqBAIJ()
3173 v++; in MatNorm_SeqBAIJ()
3186 tmp[k1] += PetscAbsScalar(*v); in MatNorm_SeqBAIJ()
3187 v++; in MatNorm_SeqBAIJ()
3202 v = a->a + bs2 * a->i[j] + k; in MatNorm_SeqBAIJ()
3206 sum += PetscAbsScalar(*v); in MatNorm_SeqBAIJ()
3207 v += bs; in MatNorm_SeqBAIJ()
3218 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A, Vec v) in MatGetDiagonal_SeqBAIJ() argument
3231 PetscCall(VecGetLocalSize(v, &n)); in MatGetDiagonal_SeqBAIJ()
3233 PetscCall(VecGetArrayWrite(v, &x)); in MatGetDiagonal_SeqBAIJ()
3252 PetscCall(VecRestoreArrayWrite(v, &x)); in MatGetDiagonal_SeqBAIJ()
3261 MatScalar *aa, *v; in MatDiagonalScale_SeqBAIJ() local
3281 v = PetscSafePointerPlusOffset(aa, bs2 * ai[i]); in MatDiagonalScale_SeqBAIJ()
3283 for (k = 0; k < bs2; k++) (*v++) *= li[k % bs]; in MatDiagonalScale_SeqBAIJ()
3297 v = PetscSafePointerPlusOffset(aa, bs2 * iai); in MatDiagonalScale_SeqBAIJ()
3302 for (tmp = 0; tmp < bs; tmp++) v[tmp] *= x; in MatDiagonalScale_SeqBAIJ()
3303 v += bs; in MatDiagonalScale_SeqBAIJ()
3368 const MatScalar *v, *vv; in MatMatMult_SeqBAIJ_1_Private() local
3374 v = a->a; in MatMatMult_SeqBAIJ_1_Private()
3389 PetscPrefetchBlock(v + n, n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMatMult_SeqBAIJ_1_Private()
3392 vv = v; in MatMatMult_SeqBAIJ_1_Private()
3395 v = vv; in MatMatMult_SeqBAIJ_1_Private()
3400 sum1 += v[0] * x1; in MatMatMult_SeqBAIJ_1_Private()
3401 v += 1; in MatMatMult_SeqBAIJ_1_Private()
3416 const MatScalar *v, *vv; in MatMatMult_SeqBAIJ_2_Private() local
3422 v = a->a; in MatMatMult_SeqBAIJ_2_Private()
3437 PetscPrefetchBlock(v + 4 * n, 4 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMatMult_SeqBAIJ_2_Private()
3440 vv = v; in MatMatMult_SeqBAIJ_2_Private()
3443 v = vv; in MatMatMult_SeqBAIJ_2_Private()
3450 sum1 += v[0] * x1 + v[2] * x2; in MatMatMult_SeqBAIJ_2_Private()
3451 sum2 += v[1] * x1 + v[3] * x2; in MatMatMult_SeqBAIJ_2_Private()
3452 v += 4; in MatMatMult_SeqBAIJ_2_Private()
3468 const MatScalar *v, *vv; in MatMatMult_SeqBAIJ_3_Private() local
3474 v = a->a; in MatMatMult_SeqBAIJ_3_Private()
3489 PetscPrefetchBlock(v + 9 * n, 9 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMatMult_SeqBAIJ_3_Private()
3492 vv = v; in MatMatMult_SeqBAIJ_3_Private()
3495 v = vv; in MatMatMult_SeqBAIJ_3_Private()
3504 sum1 += v[0] * x1 + v[3] * x2 + v[6] * x3; in MatMatMult_SeqBAIJ_3_Private()
3505 sum2 += v[1] * x1 + v[4] * x2 + v[7] * x3; in MatMatMult_SeqBAIJ_3_Private()
3506 sum3 += v[2] * x1 + v[5] * x2 + v[8] * x3; in MatMatMult_SeqBAIJ_3_Private()
3507 v += 9; in MatMatMult_SeqBAIJ_3_Private()
3524 const MatScalar *v, *vv; in MatMatMult_SeqBAIJ_4_Private() local
3530 v = a->a; in MatMatMult_SeqBAIJ_4_Private()
3545 … PetscPrefetchBlock(v + 16 * n, 16 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMatMult_SeqBAIJ_4_Private()
3548 vv = v; in MatMatMult_SeqBAIJ_4_Private()
3551 v = vv; in MatMatMult_SeqBAIJ_4_Private()
3562 sum1 += v[0] * x1 + v[4] * x2 + v[8] * x3 + v[12] * x4; in MatMatMult_SeqBAIJ_4_Private()
3563 sum2 += v[1] * x1 + v[5] * x2 + v[9] * x3 + v[13] * x4; in MatMatMult_SeqBAIJ_4_Private()
3564 sum3 += v[2] * x1 + v[6] * x2 + v[10] * x3 + v[14] * x4; in MatMatMult_SeqBAIJ_4_Private()
3565 sum4 += v[3] * x1 + v[7] * x2 + v[11] * x3 + v[15] * x4; in MatMatMult_SeqBAIJ_4_Private()
3566 v += 16; in MatMatMult_SeqBAIJ_4_Private()
3584 const MatScalar *v, *vv; in MatMatMult_SeqBAIJ_5_Private() local
3590 v = a->a; in MatMatMult_SeqBAIJ_5_Private()
3605 … PetscPrefetchBlock(v + 25 * n, 25 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatMatMult_SeqBAIJ_5_Private()
3608 vv = v; in MatMatMult_SeqBAIJ_5_Private()
3611 v = vv; in MatMatMult_SeqBAIJ_5_Private()
3624 sum1 += v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5; in MatMatMult_SeqBAIJ_5_Private()
3625 sum2 += v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5; in MatMatMult_SeqBAIJ_5_Private()
3626 sum3 += v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5; in MatMatMult_SeqBAIJ_5_Private()
3627 sum4 += v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5; in MatMatMult_SeqBAIJ_5_Private()
3628 sum5 += v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5; in MatMatMult_SeqBAIJ_5_Private()
3629 v += 25; in MatMatMult_SeqBAIJ_5_Private()
3652 const MatScalar *v; in MatMatMultNumeric_SeqBAIJ_SeqDense() local
3662 b = bd->v; in MatMatMultNumeric_SeqBAIJ_SeqDense()
3687 v = a->a; in MatMatMultNumeric_SeqBAIJ_SeqDense()
3702 …PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &bbs, &bcn, &bbs, &_DOne, v, &bbs, b + bs * (*idx++)… in MatMatMultNumeric_SeqBAIJ_SeqDense()
3703 v += bs2; in MatMatMultNumeric_SeqBAIJ_SeqDense()
3706 …PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &bbs, &bcn, &bbs, &_DOne, v, &bbs, b + bs * (*idx++)… in MatMatMultNumeric_SeqBAIJ_SeqDense()
3707 v += bs2; in MatMatMultNumeric_SeqBAIJ_SeqDense()
3720 const MatScalar *v; in MatTransposeMatMult_SeqBAIJ_1_Private() local
3727 v = a->a; in MatTransposeMatMult_SeqBAIJ_1_Private()
3743 PetscPrefetchBlock(v + 1 * n, 1 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatTransposeMatMult_SeqBAIJ_1_Private()
3747 for (k = 0; k < cn; k++) zcol[0 + k * cm] += v[0] * bi[k * bm]; in MatTransposeMatMult_SeqBAIJ_1_Private()
3748 ++v; in MatTransposeMatMult_SeqBAIJ_1_Private()
3757 const MatScalar *v; in MatTransposeMatMult_SeqBAIJ_2_Private() local
3765 v = a->a; in MatTransposeMatMult_SeqBAIJ_2_Private()
3781 PetscPrefetchBlock(v + 4 * n, 4 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatTransposeMatMult_SeqBAIJ_2_Private()
3788 zcol[0 + k * cm] += v[0] * x1 + v[1] * x2; in MatTransposeMatMult_SeqBAIJ_2_Private()
3789 zcol[1 + k * cm] += v[2] * x1 + v[3] * x2; in MatTransposeMatMult_SeqBAIJ_2_Private()
3791 v += 4; in MatTransposeMatMult_SeqBAIJ_2_Private()
3800 const MatScalar *v; in MatTransposeMatMult_SeqBAIJ_3_Private() local
3808 v = a->a; in MatTransposeMatMult_SeqBAIJ_3_Private()
3824 PetscPrefetchBlock(v + 9 * n, 9 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatTransposeMatMult_SeqBAIJ_3_Private()
3832 zcol[0 + k * cm] += v[0] * x1 + v[1] * x2 + v[2] * x3; in MatTransposeMatMult_SeqBAIJ_3_Private()
3833 zcol[1 + k * cm] += v[3] * x1 + v[4] * x2 + v[5] * x3; in MatTransposeMatMult_SeqBAIJ_3_Private()
3834 zcol[2 + k * cm] += v[6] * x1 + v[7] * x2 + v[8] * x3; in MatTransposeMatMult_SeqBAIJ_3_Private()
3836 v += 9; in MatTransposeMatMult_SeqBAIJ_3_Private()
3845 const MatScalar *v; in MatTransposeMatMult_SeqBAIJ_4_Private() local
3853 v = a->a; in MatTransposeMatMult_SeqBAIJ_4_Private()
3869 … PetscPrefetchBlock(v + 16 * n, 16 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatTransposeMatMult_SeqBAIJ_4_Private()
3878 zcol[0 + k * cm] += v[0] * x1 + v[1] * x2 + v[2] * x3 + v[3] * x4; in MatTransposeMatMult_SeqBAIJ_4_Private()
3879 zcol[1 + k * cm] += v[4] * x1 + v[5] * x2 + v[6] * x3 + v[7] * x4; in MatTransposeMatMult_SeqBAIJ_4_Private()
3880 zcol[2 + k * cm] += v[8] * x1 + v[9] * x2 + v[10] * x3 + v[11] * x4; in MatTransposeMatMult_SeqBAIJ_4_Private()
3881 zcol[3 + k * cm] += v[12] * x1 + v[13] * x2 + v[14] * x3 + v[15] * x4; in MatTransposeMatMult_SeqBAIJ_4_Private()
3883 v += 16; in MatTransposeMatMult_SeqBAIJ_4_Private()
3892 const MatScalar *v; in MatTransposeMatMult_SeqBAIJ_5_Private() local
3900 v = a->a; in MatTransposeMatMult_SeqBAIJ_5_Private()
3916 … PetscPrefetchBlock(v + 25 * n, 25 * n, 0, PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ in MatTransposeMatMult_SeqBAIJ_5_Private()
3926 zcol[0 + k * cm] += v[0] * x1 + v[1] * x2 + v[2] * x3 + v[3] * x4 + v[4] * x5; in MatTransposeMatMult_SeqBAIJ_5_Private()
3927 zcol[1 + k * cm] += v[5] * x1 + v[6] * x2 + v[7] * x3 + v[8] * x4 + v[9] * x5; in MatTransposeMatMult_SeqBAIJ_5_Private()
3928 zcol[2 + k * cm] += v[10] * x1 + v[11] * x2 + v[12] * x3 + v[13] * x4 + v[14] * x5; in MatTransposeMatMult_SeqBAIJ_5_Private()
3929 zcol[3 + k * cm] += v[15] * x1 + v[16] * x2 + v[17] * x3 + v[18] * x4 + v[19] * x5; in MatTransposeMatMult_SeqBAIJ_5_Private()
3930 zcol[4 + k * cm] += v[20] * x1 + v[21] * x2 + v[22] * x3 + v[23] * x4 + v[24] * x5; in MatTransposeMatMult_SeqBAIJ_5_Private()
3932 v += 25; in MatTransposeMatMult_SeqBAIJ_5_Private()
3947 const MatScalar *v; in MatTransposeMatMultNumeric_SeqBAIJ_SeqDense() local
3957 b = bd->v; in MatTransposeMatMultNumeric_SeqBAIJ_SeqDense()
3982 v = a->a; in MatTransposeMatMultNumeric_SeqBAIJ_SeqDense()
3997 …PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &bbs, &bcn, &bbs, &_DOne, v, &bbs, bi, &bbm, &_DOne,… in MatTransposeMatMultNumeric_SeqBAIJ_SeqDense()
3998 v += bs2; in MatTransposeMatMultNumeric_SeqBAIJ_SeqDense()