1 static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\
2 Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n";
3
4 /*
5 Example:
6 mpiexec -n <np> ./ex33 -mem_view -matproduct_batch_size <Bbn>
7 */
8
9 #include <petsc.h>
10
Print_memory(PetscLogDouble mem)11 PetscErrorCode Print_memory(PetscLogDouble mem)
12 {
13 double max_mem, min_mem;
14
15 PetscFunctionBeginUser;
16 PetscCallMPI(MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD));
17 PetscCallMPI(MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD));
18 max_mem = max_mem / 1024.0 / 1024.0;
19 min_mem = min_mem / 1024.0 / 1024.0;
20 PetscCall(PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem, (double)min_mem));
21 PetscFunctionReturn(PETSC_SUCCESS);
22 }
23
24 /*
25 Illustrate how to use MPI derived data types.
26 It would save memory significantly. See MatMPIDenseScatter()
27 */
TestMPIDerivedDataType()28 PetscErrorCode TestMPIDerivedDataType()
29 {
30 MPI_Datatype type1, type2, rtype1, rtype2;
31 PetscInt i, j;
32 PetscScalar buffer[24]; /* An array of 4 rows, 6 cols */
33 MPI_Status status;
34 PetscMPIInt rank, size, disp[2];
35
36 PetscFunctionBeginUser;
37 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
38 PetscCheck(size >= 2, PETSC_COMM_SELF, PETSC_ERR_WRONG_MPI_SIZE, "Must use at least 2 processors");
39 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
40
41 if (rank == 0) {
42 /* proc[0] sends 2 rows to proc[1] */
43 for (i = 0; i < 24; i++) buffer[i] = (PetscScalar)i;
44
45 disp[0] = 0;
46 disp[1] = 2;
47 PetscCallMPI(MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1));
48 /* one column has 4 entries */
49 PetscCallMPI(MPI_Type_create_resized(type1, 0, 4 * sizeof(PetscScalar), &type2));
50 PetscCallMPI(MPI_Type_commit(&type2));
51 PetscCallMPI(MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD));
52
53 } else if (rank == 1) {
54 /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */
55 for (i = 0; i < 24; i++) buffer[i] = 0.0;
56
57 disp[0] = 1;
58 PetscCallMPI(MPI_Type_create_indexed_block(1, 2, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1));
59 PetscCallMPI(MPI_Type_create_resized(rtype1, 0, 4 * sizeof(PetscScalar), &rtype2));
60
61 PetscCallMPI(MPI_Type_commit(&rtype2));
62 PetscCallMPI(MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status));
63 for (i = 0; i < 4; i++) {
64 for (j = 0; j < 6; j++) PetscCall(PetscPrintf(MPI_COMM_SELF, " %g", (double)PetscRealPart(buffer[i + j * 4])));
65 PetscCall(PetscPrintf(MPI_COMM_SELF, "\n"));
66 }
67 }
68
69 if (rank == 0) {
70 PetscCallMPI(MPI_Type_free(&type1));
71 PetscCallMPI(MPI_Type_free(&type2));
72 } else if (rank == 1) {
73 PetscCallMPI(MPI_Type_free(&rtype1));
74 PetscCallMPI(MPI_Type_free(&rtype2));
75 }
76 PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD));
77 PetscFunctionReturn(PETSC_SUCCESS);
78 }
79
main(int argc,char ** args)80 int main(int argc, char **args)
81 {
82 PetscInt mA = 2700, nX = 80, nz = 40;
83 /* PetscInt mA=6,nX=5,nz=2; //small test */
84 PetscLogDouble mem;
85 Mat A, X, Y;
86 PetscBool flg = PETSC_FALSE;
87
88 PetscFunctionBeginUser;
89 PetscCall(PetscInitialize(&argc, &args, NULL, help));
90 PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_mpiderivedtype", &flg, NULL));
91 if (flg) {
92 PetscCall(TestMPIDerivedDataType());
93 PetscCall(PetscFinalize());
94 return 0;
95 }
96
97 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mem_view", &flg, NULL));
98 PetscCall(PetscMemoryGetCurrentUsage(&mem));
99 if (flg) {
100 PetscCall(PetscPrintf(MPI_COMM_WORLD, "Before start,"));
101 PetscCall(Print_memory(mem));
102 }
103
104 PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, mA, nz, NULL, nz, NULL, &A));
105 PetscCall(MatSetRandom(A, NULL));
106 PetscCall(PetscMemoryGetCurrentUsage(&mem));
107 if (flg) {
108 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating A,"));
109 PetscCall(Print_memory(mem));
110 }
111
112 PetscCall(MatCreateDense(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, nX, NULL, &X));
113 PetscCall(MatSetRandom(X, NULL));
114 PetscCall(PetscMemoryGetCurrentUsage(&mem));
115 if (flg) {
116 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating X,"));
117 PetscCall(Print_memory(mem));
118 }
119
120 PetscCall(MatMatMult(A, X, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &Y));
121 PetscCall(PetscMemoryGetCurrentUsage(&mem));
122 if (flg) {
123 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,"));
124 PetscCall(Print_memory(mem));
125 }
126
127 /* Test reuse */
128 PetscCall(MatMatMult(A, X, MAT_REUSE_MATRIX, PETSC_DETERMINE, &Y));
129 PetscCall(PetscMemoryGetCurrentUsage(&mem));
130 if (flg) {
131 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,"));
132 PetscCall(Print_memory(mem));
133 }
134
135 /* Check accuracy */
136 PetscCall(MatMatMultEqual(A, X, Y, 10, &flg));
137 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMETYPE, "Error in MatMatMult()");
138
139 PetscCall(MatDestroy(&A));
140 PetscCall(MatDestroy(&X));
141 PetscCall(MatDestroy(&Y));
142
143 PetscCall(PetscFinalize());
144 return 0;
145 }
146
147 /*TEST
148
149 test:
150 suffix: 1
151 nsize: 4
152 output_file: output/empty.out
153
154 test:
155 suffix: 2
156 nsize: 8
157 output_file: output/empty.out
158
159 test:
160 suffix: 3
161 nsize: 2
162 args: -test_mpiderivedtype
163 output_file: output/ex33_3.out
164
165 TEST*/
166