1 static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n" 2 "This test can only be run in parallel.\n" 3 "\n"; 4 5 #include <petscmat.h> 6 7 int main(int argc, char **args) { 8 Mat A, *submats; 9 MPI_Comm subcomm; 10 PetscMPIInt rank, size, subrank, subsize, color; 11 PetscInt m, n, N, bs, rstart, rend, i, j, k, total_subdomains, hash, nsubdomains = 1; 12 PetscInt nis, *cols, gnsubdomains, gsubdomainnums[1], gsubdomainperm[1], s, gs; 13 PetscInt *rowindices, *colindices, idx, rep; 14 PetscScalar *vals; 15 IS rowis[1], colis[1]; 16 PetscViewer viewer; 17 PetscBool permute_indices, flg; 18 19 PetscFunctionBeginUser; 20 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 21 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 22 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 23 24 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "ex183", "Mat"); 25 m = 5; 26 PetscCall(PetscOptionsInt("-m", "Local matrix size", "MatSetSizes", m, &m, &flg)); 27 total_subdomains = size - 1; 28 PetscCall(PetscOptionsInt("-total_subdomains", "Number of submatrices where 0 < n < comm size", "MatCreateSubMatricesMPI", total_subdomains, &total_subdomains, &flg)); 29 permute_indices = PETSC_FALSE; 30 PetscCall(PetscOptionsBool("-permute_indices", "Whether to permute indices before breaking them into subdomains", "ISCreateGeneral", permute_indices, &permute_indices, &flg)); 31 hash = 7; 32 PetscCall(PetscOptionsInt("-hash", "Permutation factor, which has to be relatively prime to M = size*m (total matrix size)", "ISCreateGeneral", hash, &hash, &flg)); 33 rep = 2; 34 PetscCall(PetscOptionsInt("-rep", "Number of times to carry out submatrix extractions; currently only 1 & 2 are supported", NULL, rep, &rep, &flg)); 35 PetscOptionsEnd(); 36 37 PetscCheck(total_subdomains <= size, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Number of subdomains %" PetscInt_FMT " must not exceed comm size %d", total_subdomains, size); 38 PetscCheck(total_subdomains >= 1 && total_subdomains <= size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "number of subdomains must be > 0 and <= %d (comm size), got total_subdomains = %" PetscInt_FMT, size, total_subdomains); 39 PetscCheck(rep == 1 || rep == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid number of test repetitions: %" PetscInt_FMT "; must be 1 or 2", rep); 40 41 viewer = PETSC_VIEWER_STDOUT_WORLD; 42 /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */ 43 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 44 PetscCall(MatSetSizes(A, m, m, PETSC_DECIDE, PETSC_DECIDE)); 45 PetscCall(MatSetFromOptions(A)); 46 PetscCall(MatSetUp(A)); 47 PetscCall(MatGetSize(A, NULL, &N)); 48 PetscCall(MatGetLocalSize(A, NULL, &n)); 49 PetscCall(MatGetBlockSize(A, &bs)); 50 PetscCall(MatSeqAIJSetPreallocation(A, n, NULL)); 51 PetscCall(MatMPIAIJSetPreallocation(A, n, NULL, N - n, NULL)); 52 PetscCall(MatSeqBAIJSetPreallocation(A, bs, n / bs, NULL)); 53 PetscCall(MatMPIBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL)); 54 PetscCall(MatSeqSBAIJSetPreallocation(A, bs, n / bs, NULL)); 55 PetscCall(MatMPISBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL)); 56 57 PetscCall(PetscMalloc2(N, &cols, N, &vals)); 58 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 59 for (j = 0; j < N; ++j) cols[j] = j; 60 for (i = rstart; i < rend; i++) { 61 for (j = 0; j < N; ++j) { vals[j] = i * 10000 + j; } 62 PetscCall(MatSetValues(A, 1, &i, N, cols, vals, INSERT_VALUES)); 63 } 64 PetscCall(PetscFree2(cols, vals)); 65 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 66 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 67 68 PetscCall(PetscViewerASCIIPrintf(viewer, "Initial matrix:\n")); 69 PetscCall(MatView(A, viewer)); 70 71 /* 72 Create subcomms and ISs so that each rank participates in one IS. 73 The IS either coalesces adjacent rank indices (contiguous), 74 or selects indices by scrambling them using a hash. 75 */ 76 k = size / total_subdomains + (size % total_subdomains > 0); /* There are up to k ranks to a color */ 77 color = rank / k; 78 PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD, color, rank, &subcomm)); 79 PetscCallMPI(MPI_Comm_size(subcomm, &subsize)); 80 PetscCallMPI(MPI_Comm_rank(subcomm, &subrank)); 81 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 82 nis = 1; 83 PetscCall(PetscMalloc2(rend - rstart, &rowindices, rend - rstart, &colindices)); 84 85 for (j = rstart; j < rend; ++j) { 86 if (permute_indices) { 87 idx = (j * hash); 88 } else { 89 idx = j; 90 } 91 rowindices[j - rstart] = idx % N; 92 colindices[j - rstart] = (idx + m) % N; 93 } 94 PetscCall(ISCreateGeneral(subcomm, rend - rstart, rowindices, PETSC_COPY_VALUES, &rowis[0])); 95 PetscCall(ISCreateGeneral(subcomm, rend - rstart, colindices, PETSC_COPY_VALUES, &colis[0])); 96 PetscCall(ISSort(rowis[0])); 97 PetscCall(ISSort(colis[0])); 98 PetscCall(PetscFree2(rowindices, colindices)); 99 /* 100 Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms, 101 we need to obtain the global numbers of our local objects and wait for the corresponding global 102 number to be viewed. 103 */ 104 PetscCall(PetscViewerASCIIPrintf(viewer, "Subdomains")); 105 if (permute_indices) { PetscCall(PetscViewerASCIIPrintf(viewer, " (hash=%" PetscInt_FMT ")", hash)); } 106 PetscCall(PetscViewerASCIIPrintf(viewer, ":\n")); 107 PetscCall(PetscViewerFlush(viewer)); 108 109 nsubdomains = 1; 110 for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 111 PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)rowis, &gnsubdomains, gsubdomainnums)); 112 PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 113 for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 114 if (s < nsubdomains) { 115 PetscInt ss; 116 ss = gsubdomainperm[s]; 117 if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 118 PetscViewer subviewer = NULL; 119 PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 120 PetscCall(PetscViewerASCIIPrintf(subviewer, "Row IS %" PetscInt_FMT "\n", gs)); 121 PetscCall(ISView(rowis[ss], subviewer)); 122 PetscCall(PetscViewerFlush(subviewer)); 123 PetscCall(PetscViewerASCIIPrintf(subviewer, "Col IS %" PetscInt_FMT "\n", gs)); 124 PetscCall(ISView(colis[ss], subviewer)); 125 PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 126 ++s; 127 } 128 } 129 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 130 } 131 PetscCall(PetscViewerFlush(viewer)); 132 PetscCall(ISSort(rowis[0])); 133 PetscCall(ISSort(colis[0])); 134 nsubdomains = 1; 135 PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_INITIAL_MATRIX, &submats)); 136 /* 137 Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 138 we need to obtain the global numbers of our local objects and wait for the corresponding global 139 number to be viewed. 140 */ 141 PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 1):\n")); 142 for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 143 PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums)); 144 PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 145 for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 146 if (s < nsubdomains) { 147 PetscInt ss; 148 ss = gsubdomainperm[s]; 149 if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 150 PetscViewer subviewer = NULL; 151 PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 152 PetscCall(MatView(submats[ss], subviewer)); 153 PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 154 ++s; 155 } 156 } 157 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 158 } 159 PetscCall(PetscViewerFlush(viewer)); 160 if (rep == 1) goto cleanup; 161 nsubdomains = 1; 162 PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_REUSE_MATRIX, &submats)); 163 /* 164 Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 165 we need to obtain the global numbers of our local objects and wait for the corresponding global 166 number to be viewed. 167 */ 168 PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 2):\n")); 169 for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 170 PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums)); 171 PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 172 for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 173 if (s < nsubdomains) { 174 PetscInt ss; 175 ss = gsubdomainperm[s]; 176 if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 177 PetscViewer subviewer = NULL; 178 PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 179 PetscCall(MatView(submats[ss], subviewer)); 180 PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 181 ++s; 182 } 183 } 184 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 185 } 186 PetscCall(PetscViewerFlush(viewer)); 187 cleanup: 188 for (k = 0; k < nsubdomains; ++k) { PetscCall(MatDestroy(submats + k)); } 189 PetscCall(PetscFree(submats)); 190 for (k = 0; k < nis; ++k) { 191 PetscCall(ISDestroy(rowis + k)); 192 PetscCall(ISDestroy(colis + k)); 193 } 194 PetscCall(MatDestroy(&A)); 195 PetscCallMPI(MPI_Comm_free(&subcomm)); 196 PetscCall(PetscFinalize()); 197 return 0; 198 } 199 200 /*TEST 201 202 test: 203 nsize: 2 204 args: -total_subdomains 1 205 output_file: output/ex183_2_1.out 206 207 test: 208 suffix: 2 209 nsize: 3 210 args: -total_subdomains 2 211 output_file: output/ex183_3_2.out 212 213 test: 214 suffix: 3 215 nsize: 4 216 args: -total_subdomains 2 217 output_file: output/ex183_4_2.out 218 219 test: 220 suffix: 4 221 nsize: 6 222 args: -total_subdomains 2 223 output_file: output/ex183_6_2.out 224 225 TEST*/ 226