1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 fftw_plan p_forward,p_backward; 15 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 16 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 17 executed for the arrays with which the plan was created */ 18 } Mat_FFTW; 19 20 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 21 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatDestroy_FFTW(Mat); 25 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 26 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatMult_SeqFFTW" 30 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 31 { 32 PetscErrorCode ierr; 33 Mat_FFT *fft = (Mat_FFT*)A->data; 34 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 35 PetscScalar *x_array,*y_array; 36 PetscInt ndim=fft->ndim,*dim=fft->dim; 37 38 PetscFunctionBegin; 39 #if !defined(PETSC_USE_COMPLEX) 40 41 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 42 #endif 43 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 44 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 45 if (!fftw->p_forward){ /* create a plan, then excute it */ 46 switch (ndim){ 47 case 1: 48 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 49 break; 50 case 2: 51 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 52 break; 53 case 3: 54 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 55 break; 56 default: 57 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 58 break; 59 } 60 fftw->finarray = x_array; 61 fftw->foutarray = y_array; 62 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 63 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 64 fftw_execute(fftw->p_forward); 65 } else { /* use existing plan */ 66 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 67 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 68 } else { 69 fftw_execute(fftw->p_forward); 70 } 71 } 72 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 73 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 79 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 80 { 81 PetscErrorCode ierr; 82 Mat_FFT *fft = (Mat_FFT*)A->data; 83 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 84 PetscScalar *x_array,*y_array; 85 PetscInt ndim=fft->ndim,*dim=fft->dim; 86 87 PetscFunctionBegin; 88 #if !defined(PETSC_USE_COMPLEX) 89 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 90 #endif 91 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 92 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 93 if (!fftw->p_backward){ /* create a plan, then excute it */ 94 switch (ndim){ 95 case 1: 96 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 97 break; 98 case 2: 99 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 100 break; 101 case 3: 102 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 103 break; 104 default: 105 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 106 break; 107 } 108 fftw->binarray = x_array; 109 fftw->boutarray = y_array; 110 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 111 } else { /* use existing plan */ 112 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 113 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 114 } else { 115 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 116 } 117 } 118 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 119 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatMult_MPIFFTW" 125 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 126 { 127 PetscErrorCode ierr; 128 Mat_FFT *fft = (Mat_FFT*)A->data; 129 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 130 PetscScalar *x_array,*y_array; 131 PetscInt ndim=fft->ndim,*dim=fft->dim; 132 MPI_Comm comm=((PetscObject)A)->comm; 133 // PetscInt ctr; 134 // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 135 // ndim1=(ptrdiff_t) ndim; 136 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 137 138 // for(ctr=0;ctr<ndim;ctr++) 139 // { 140 // pdim[ctr] = dim[ctr]; 141 // } 142 143 PetscFunctionBegin; 144 #if !defined(PETSC_USE_COMPLEX) 145 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 146 #endif 147 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 148 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 149 150 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 151 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 152 if (!fftw->p_forward){ /* create a plan, then excute it */ 153 switch (ndim){ 154 case 1: 155 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 156 break; 157 case 2: 158 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 159 break; 160 case 3: 161 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 162 break; 163 default: 164 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 165 // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 166 break; 167 } 168 fftw->finarray = x_array; 169 fftw->foutarray = y_array; 170 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 171 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 172 fftw_execute(fftw->p_forward); 173 } else { /* use existing plan */ 174 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 175 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 176 } else { 177 fftw_execute(fftw->p_forward); 178 } 179 } 180 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 181 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 187 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 188 { 189 PetscErrorCode ierr; 190 Mat_FFT *fft = (Mat_FFT*)A->data; 191 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 192 PetscScalar *x_array,*y_array; 193 PetscInt ndim=fft->ndim,*dim=fft->dim; 194 MPI_Comm comm=((PetscObject)A)->comm; 195 // PetscInt ctr; 196 // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 197 // ndim1=(ptrdiff_t) ndim; 198 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 199 200 // for(ctr=0;ctr<ndim;ctr++) 201 // { 202 // pdim[ctr] = dim[ctr]; 203 // } 204 205 PetscFunctionBegin; 206 #if !defined(PETSC_USE_COMPLEX) 207 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 208 #endif 209 // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 210 // should pdim be a member of Mat_FFTW? 211 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 212 213 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 214 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 215 if (!fftw->p_backward){ /* create a plan, then excute it */ 216 switch (ndim){ 217 case 1: 218 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 219 break; 220 case 2: 221 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 222 break; 223 case 3: 224 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 225 break; 226 default: 227 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 228 // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 229 break; 230 } 231 fftw->binarray = x_array; 232 fftw->boutarray = y_array; 233 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 234 } else { /* use existing plan */ 235 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 236 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 237 } else { 238 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 239 } 240 } 241 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 242 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatDestroy_FFTW" 248 PetscErrorCode MatDestroy_FFTW(Mat A) 249 { 250 Mat_FFT *fft = (Mat_FFT*)A->data; 251 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 #if !defined(PETSC_USE_COMPLEX) 256 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 257 #endif 258 fftw_destroy_plan(fftw->p_forward); 259 fftw_destroy_plan(fftw->p_backward); 260 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 261 ierr = PetscFree(fft->data);CHKERRQ(ierr); 262 PetscFunctionReturn(0); 263 } 264 265 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 266 #undef __FUNCT__ 267 #define __FUNCT__ "VecDestroy_MPIFFTW" 268 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 269 { 270 PetscErrorCode ierr; 271 PetscScalar *array; 272 273 PetscFunctionBegin; 274 #if !defined(PETSC_USE_COMPLEX) 275 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 276 #endif 277 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 278 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 279 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 280 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatGetVecs_FFTW1D" 286 /* 287 MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 288 parallel layout determined by FFTW-1D 289 290 */ 291 PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 292 { 293 PetscErrorCode ierr; 294 PetscMPIInt size,rank; 295 MPI_Comm comm=((PetscObject)A)->comm; 296 Mat_FFT *fft = (Mat_FFT*)A->data; 297 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 298 PetscInt N=fft->N; 299 PetscInt ndim=fft->ndim,*dim=fft->dim; 300 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 301 ptrdiff_t f_local_n1,f_local_1_end; 302 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 303 ptrdiff_t b_local_n1,b_local_1_end; 304 fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 305 306 PetscFunctionBegin; 307 #if !defined(PETSC_USE_COMPLEX) 308 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 309 #endif 310 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 311 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 312 if (size == 1){ 313 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 314 } 315 else { 316 if (ndim>1){ 317 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 318 else { 319 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 320 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 321 if (fin) { 322 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 323 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 324 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 325 } 326 if (fout) { 327 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 328 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 329 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 330 } 331 if (bin) { 332 data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 333 ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 334 (*bin)->ops->destroy = VecDestroy_MPIFFTW; 335 } 336 if (bout) { 337 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 338 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 339 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 340 } 341 } 342 if (fin){ 343 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 344 } 345 if (fout){ 346 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 347 } 348 if (bin){ 349 ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 350 } 351 if (bout){ 352 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 353 } 354 PetscFunctionReturn(0); 355 } 356 357 358 359 360 361 362 363 } 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatGetVecs_FFTW" 366 /* 367 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 368 parallel layout determined by FFTW 369 370 Collective on Mat 371 372 Input Parameter: 373 . mat - the matrix 374 375 Output Parameter: 376 + fin - (optional) input vector of forward FFTW 377 - fout - (optional) output vector of forward FFTW 378 379 Level: advanced 380 381 .seealso: MatCreateFFTW() 382 */ 383 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 384 { 385 PetscErrorCode ierr; 386 PetscMPIInt size,rank; 387 MPI_Comm comm=((PetscObject)A)->comm; 388 Mat_FFT *fft = (Mat_FFT*)A->data; 389 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 390 PetscInt N=fft->N; 391 392 PetscFunctionBegin; 393 #if !defined(PETSC_USE_COMPLEX) 394 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 395 #endif 396 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 397 PetscValidType(A,1); 398 399 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 400 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 401 if (size == 1){ /* sequential case */ 402 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 403 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 404 } else { /* mpi case */ 405 ptrdiff_t alloc_local,local_n0,local_0_start; 406 ptrdiff_t local_n1,local_1_end; 407 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 408 fftw_complex *data_fin,*data_fout; 409 // PetscInt ctr; 410 // ptrdiff_t ndim1,*pdim; 411 // ndim1=(ptrdiff_t) ndim; 412 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 413 414 // for(ctr=0;ctr<ndim;ctr++) 415 // { 416 // pdim[ctr] = dim[ctr]; 417 // } 418 419 switch (ndim){ 420 case 1: 421 /* Get local size */ 422 /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */ 423 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 424 425 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 426 if (fin) { 427 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 428 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 429 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 430 } 431 if (fout) { 432 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 433 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 434 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 435 } 436 break; 437 case 2: 438 /* Get local size */ 439 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 440 if (fin) { 441 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 442 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 443 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 444 } 445 if (fout) { 446 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 447 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 448 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 449 } 450 break; 451 case 3: 452 /* Get local size */ 453 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 454 // printf("The quantity n is %d",n); 455 if (fin) { 456 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 457 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 458 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 459 } 460 if (fout) { 461 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 462 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 463 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 464 } 465 break; 466 default: 467 /* Get local size */ 468 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 469 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 470 // printf("The value of alloc local is %d",alloc_local); 471 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 472 // for(i=0;i<ndim;i++) 473 // { 474 // pdim[i]=dim[i];printf("%d",pdim[i]); 475 // } 476 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 477 // printf("The quantity n is %d",n); 478 if (fin) { 479 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 480 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 481 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 482 } 483 if (fout) { 484 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 485 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 486 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 487 } 488 break; 489 } 490 } 491 if (fin){ 492 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 493 } 494 if (fout){ 495 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 496 } 497 PetscFunctionReturn(0); 498 } 499 500 EXTERN_C_BEGIN 501 #undef __FUNCT__ 502 #define __FUNCT__ "MatCreate_FFTW" 503 /* 504 MatCreate_FFTW - Creates a matrix object that provides FFT 505 via the external package FFTW 506 507 Options Database Keys: 508 + -mat_fftw_plannerflags - set FFTW planner flags 509 510 Level: intermediate 511 512 */ 513 PetscErrorCode MatCreate_FFTW(Mat A) 514 { 515 PetscErrorCode ierr; 516 MPI_Comm comm=((PetscObject)A)->comm; 517 Mat_FFT *fft=(Mat_FFT*)A->data; 518 Mat_FFTW *fftw; 519 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 520 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 521 PetscBool flg; 522 PetscInt p_flag,partial_dim=1,ctr; 523 PetscMPIInt size,rank; 524 ptrdiff_t *pdim; 525 526 PetscFunctionBegin; 527 #if !defined(PETSC_USE_COMPLEX) 528 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 529 #endif 530 531 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 532 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 533 534 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 535 pdim[0] = dim[0]; 536 for(ctr=1;ctr<ndim;ctr++) 537 { 538 partial_dim*= dim[ctr]; 539 pdim[ctr] = dim[ctr]; 540 } 541 // printf("partial dimension is %d",partial_dim); 542 if (size == 1) { 543 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 544 n = N; 545 } else { 546 ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 547 switch (ndim){ 548 case 1: 549 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 550 n = (PetscInt)local_n0; 551 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 552 // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs_FFTW1D_C","MatGetVecs_FFTW1D",MatGetVecs_FFTW1D); 553 break; 554 case 2: 555 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 556 /* 557 PetscMPIInt rank; 558 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 559 PetscSynchronizedFlush(comm); 560 */ 561 n = (PetscInt)local_n0*dim[1]; 562 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 563 break; 564 case 3: 565 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 566 // printf("The value of alloc local is %d",alloc_local); 567 n = (PetscInt)local_n0*dim[1]*dim[2]; 568 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 569 break; 570 default: 571 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 572 // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 573 // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 574 n = (PetscInt)local_n0*partial_dim; 575 // printf("New partial dimension is %d %d %d",n,N,ndim); 576 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 577 break; 578 } 579 } 580 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 581 582 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 583 fft->data = (void*)fftw; 584 585 fft->n = n; 586 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 587 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 588 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 589 590 fftw->p_forward = 0; 591 fftw->p_backward = 0; 592 fftw->p_flag = FFTW_ESTIMATE; 593 594 if (size == 1){ 595 A->ops->mult = MatMult_SeqFFTW; 596 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 597 } else { 598 A->ops->mult = MatMult_MPIFFTW; 599 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 600 } 601 fft->matdestroy = MatDestroy_FFTW; 602 A->ops->getvecs = MatGetVecs_FFTW; 603 A->assembled = PETSC_TRUE; 604 605 /* get runtime options */ 606 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 607 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 608 if (flg) {fftw->p_flag = (unsigned)p_flag;} 609 PetscOptionsEnd(); 610 PetscFunctionReturn(0); 611 } 612 EXTERN_C_END 613