1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 fftw_plan p_forward,p_backward; 15 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 16 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 17 executed for the arrays with which the plan was created */ 18 } Mat_FFTW; 19 20 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 21 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatDestroy_FFTW(Mat); 25 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 26 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatMult_SeqFFTW" 30 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 31 { 32 PetscErrorCode ierr; 33 Mat_FFT *fft = (Mat_FFT*)A->data; 34 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 35 PetscScalar *x_array,*y_array; 36 PetscInt ndim=fft->ndim,*dim=fft->dim; 37 38 PetscFunctionBegin; 39 #if !defined(PETSC_USE_COMPLEX) 40 41 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 42 #endif 43 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 44 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 45 if (!fftw->p_forward){ /* create a plan, then excute it */ 46 switch (ndim){ 47 case 1: 48 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 49 break; 50 case 2: 51 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 52 break; 53 case 3: 54 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 55 break; 56 default: 57 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 58 break; 59 } 60 fftw->finarray = x_array; 61 fftw->foutarray = y_array; 62 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 63 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 64 fftw_execute(fftw->p_forward); 65 } else { /* use existing plan */ 66 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 67 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 68 } else { 69 fftw_execute(fftw->p_forward); 70 } 71 } 72 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 73 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 79 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 80 { 81 PetscErrorCode ierr; 82 Mat_FFT *fft = (Mat_FFT*)A->data; 83 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 84 PetscScalar *x_array,*y_array; 85 PetscInt ndim=fft->ndim,*dim=fft->dim; 86 87 PetscFunctionBegin; 88 #if !defined(PETSC_USE_COMPLEX) 89 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 90 #endif 91 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 92 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 93 if (!fftw->p_backward){ /* create a plan, then excute it */ 94 switch (ndim){ 95 case 1: 96 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 97 break; 98 case 2: 99 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 100 break; 101 case 3: 102 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 103 break; 104 default: 105 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 106 break; 107 } 108 fftw->binarray = x_array; 109 fftw->boutarray = y_array; 110 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 111 } else { /* use existing plan */ 112 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 113 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 114 } else { 115 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 116 } 117 } 118 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 119 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatMult_MPIFFTW" 125 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 126 { 127 PetscErrorCode ierr; 128 Mat_FFT *fft = (Mat_FFT*)A->data; 129 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 130 PetscScalar *x_array,*y_array; 131 PetscInt ndim=fft->ndim,*dim=fft->dim; 132 MPI_Comm comm=((PetscObject)A)->comm; 133 // PetscInt ctr; 134 // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 135 // ndim1=(ptrdiff_t) ndim; 136 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 137 138 // for(ctr=0;ctr<ndim;ctr++) 139 // { 140 // pdim[ctr] = dim[ctr]; 141 // } 142 143 PetscFunctionBegin; 144 #if !defined(PETSC_USE_COMPLEX) 145 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 146 #endif 147 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 148 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 149 150 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 151 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 152 if (!fftw->p_forward){ /* create a plan, then excute it */ 153 switch (ndim){ 154 case 1: 155 #if defined(PETSC_USE_COMPLEX) 156 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 157 #endif 158 break; 159 case 2: 160 #if defined(PETSC_USE_COMPLEX) 161 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 162 #else 163 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 164 #endif 165 break; 166 case 3: 167 #if defined(PETSC_USE_COMPLEX) 168 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 169 #else 170 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[3],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 171 #endif 172 break; 173 default: 174 #if defined(PETSC_USE_COMPLEX) 175 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 176 #else 177 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 178 #endif 179 // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 180 break; 181 } 182 fftw->finarray = x_array; 183 fftw->foutarray = y_array; 184 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 185 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 186 fftw_execute(fftw->p_forward); 187 } else { /* use existing plan */ 188 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 189 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 190 } else { 191 fftw_execute(fftw->p_forward); 192 } 193 } 194 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 195 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 196 PetscFunctionReturn(0); 197 } 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 201 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 202 { 203 PetscErrorCode ierr; 204 Mat_FFT *fft = (Mat_FFT*)A->data; 205 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 206 PetscScalar *x_array,*y_array; 207 PetscInt ndim=fft->ndim,*dim=fft->dim; 208 MPI_Comm comm=((PetscObject)A)->comm; 209 // PetscInt ctr; 210 // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 211 // ndim1=(ptrdiff_t) ndim; 212 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 213 214 // for(ctr=0;ctr<ndim;ctr++) 215 // { 216 // pdim[ctr] = dim[ctr]; 217 // } 218 219 PetscFunctionBegin; 220 //#if !defined(PETSC_USE_COMPLEX) 221 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 222 //#endif 223 // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 224 // should pdim be a member of Mat_FFTW? 225 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 226 227 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 228 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 229 if (!fftw->p_backward){ /* create a plan, then excute it */ 230 switch (ndim){ 231 case 1: 232 #if defined(PETSC_USE_COMPLEX) 233 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 234 #endif 235 break; 236 case 2: 237 #if defined(PETSC_USE_COMPLEX) 238 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 239 #else 240 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 241 #endif 242 break; 243 case 3: 244 #if defined(PETSC_USE_COMPLEX) 245 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 246 #else 247 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 248 #endif 249 break; 250 default: 251 #if defined(PETSC_USE_COMPLEX) 252 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 253 #else 254 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 255 #endif 256 // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 257 break; 258 } 259 fftw->binarray = x_array; 260 fftw->boutarray = y_array; 261 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 262 } else { /* use existing plan */ 263 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 264 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 265 } else { 266 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 267 } 268 } 269 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 270 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDestroy_FFTW" 276 PetscErrorCode MatDestroy_FFTW(Mat A) 277 { 278 Mat_FFT *fft = (Mat_FFT*)A->data; 279 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 #if !defined(PETSC_USE_COMPLEX) 284 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 285 #endif 286 fftw_destroy_plan(fftw->p_forward); 287 fftw_destroy_plan(fftw->p_backward); 288 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 289 ierr = PetscFree(fft->data);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 294 #undef __FUNCT__ 295 #define __FUNCT__ "VecDestroy_MPIFFTW" 296 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 297 { 298 PetscErrorCode ierr; 299 PetscScalar *array; 300 301 PetscFunctionBegin; 302 #if !defined(PETSC_USE_COMPLEX) 303 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 304 #endif 305 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 306 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 307 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 308 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatGetVecs1DC_FFTW" 314 /* 315 MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 316 parallel layout determined by FFTW-1D 317 318 */ 319 PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 320 { 321 PetscErrorCode ierr; 322 PetscMPIInt size,rank; 323 MPI_Comm comm=((PetscObject)A)->comm; 324 Mat_FFT *fft = (Mat_FFT*)A->data; 325 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 326 PetscInt N=fft->N; 327 PetscInt ndim=fft->ndim,*dim=fft->dim; 328 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 329 ptrdiff_t f_local_n1,f_local_1_end; 330 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 331 ptrdiff_t b_local_n1,b_local_1_end; 332 fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 333 334 PetscFunctionBegin; 335 #if !defined(PETSC_USE_COMPLEX) 336 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 337 #endif 338 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 339 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 340 if (size == 1){ 341 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 342 } 343 else { 344 if (ndim>1){ 345 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 346 else { 347 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 348 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 349 if (fin) { 350 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 351 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 352 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 353 } 354 if (fout) { 355 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 356 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 357 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 358 } 359 if (bin) { 360 data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 361 ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 362 (*bin)->ops->destroy = VecDestroy_MPIFFTW; 363 } 364 if (bout) { 365 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 366 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 367 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 368 } 369 } 370 if (fin){ 371 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 372 } 373 if (fout){ 374 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 375 } 376 if (bin){ 377 ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 378 } 379 if (bout){ 380 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 381 } 382 PetscFunctionReturn(0); 383 } 384 385 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatGetVecs_FFTW" 390 /* 391 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 392 parallel layout determined by FFTW 393 394 Collective on Mat 395 396 Input Parameter: 397 . mat - the matrix 398 399 Output Parameter: 400 + fin - (optional) input vector of forward FFTW 401 - fout - (optional) output vector of forward FFTW 402 403 Level: advanced 404 405 .seealso: MatCreateFFTW() 406 */ 407 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 408 { 409 PetscErrorCode ierr; 410 PetscMPIInt size,rank; 411 MPI_Comm comm=((PetscObject)A)->comm; 412 Mat_FFT *fft = (Mat_FFT*)A->data; 413 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 414 PetscInt N=fft->N, N1, n1,vsize; 415 416 PetscFunctionBegin; 417 //#if !defined(PETSC_USE_COMPLEX) 418 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 419 //#endif 420 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 421 PetscValidType(A,1); 422 423 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 424 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 425 if (size == 1){ /* sequential case */ 426 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 427 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 428 printf("The code successfully comes at the end of the routine with one processor\n"); 429 } else { /* mpi case */ 430 ptrdiff_t alloc_local,local_n0,local_0_start; 431 ptrdiff_t local_n1,local_1_end; 432 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 433 fftw_complex *data_fin,*data_fout; 434 double *data_finr, *data_foutr; 435 ptrdiff_t local_1_start; 436 // PetscInt ctr; 437 // ptrdiff_t ndim1,*pdim; 438 // ndim1=(ptrdiff_t) ndim; 439 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 440 441 // for(ctr=0;ctr<ndim;ctr++) 442 // {k 443 // pdim[ctr] = dim[ctr]; 444 // } 445 446 447 448 switch (ndim){ 449 case 1: 450 /* Get local size */ 451 /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */ 452 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 453 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 454 if (fin) { 455 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 456 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 457 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 458 } 459 if (fout) { 460 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 461 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 462 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 463 } 464 break; 465 case 2: 466 #if !defined(PETSC_USE_COMPLEX) 467 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 468 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 469 if (fin) { 470 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 471 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 472 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 473 //printf("The code comes here with vector size %d\n",vsize); 474 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 475 } 476 if (fout) { 477 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 478 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 479 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 482 483 #else 484 /* Get local size */ 485 printf("Hope this does not come here"); 486 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 487 if (fin) { 488 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 489 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 490 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 491 } 492 if (fout) { 493 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 494 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 495 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 496 } 497 printf("Hope this does not come here"); 498 #endif 499 break; 500 case 3: 501 /* Get local size */ 502 #if !defined(PETSC_USE_COMPLEX) 503 SETERRQ(comm,PETSC_ERR_SUP,"Not done yet"); 504 #else 505 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 506 // printf("The quantity n is %d",n); 507 if (fin) { 508 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 509 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 510 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 511 } 512 if (fout) { 513 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 514 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 515 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 516 } 517 #endif 518 break; 519 default: 520 /* Get local size */ 521 #if !defined(PETSC_USE_COMPLEX) 522 SETERRQ(comm,PETSC_ERR_SUP,"Not done yet"); 523 #else 524 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 525 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 526 // printf("The value of alloc local is %d",alloc_local); 527 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 528 // for(i=0;i<ndim;i++) 529 // { 530 // pdim[i]=dim[i];printf("%d",pdim[i]); 531 // } 532 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 533 // printf("The quantity n is %d",n); 534 if (fin) { 535 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 536 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 537 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 538 } 539 if (fout) { 540 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 541 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 542 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 543 } 544 #endif 545 break; 546 } 547 } 548 // if (fin){ 549 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 550 // } 551 // if (fout){ 552 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 553 // } 554 PetscFunctionReturn(0); 555 } 556 557 //EXTERN_C_BEGIN - Do we need this? 558 #undef __FUNCT__ 559 #define __FUNCT__ "InputTransformFFT" 560 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 561 { 562 PetscErrorCode ierr; 563 PetscFunctionBegin; 564 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 565 PetscFunctionReturn(0); 566 } 567 //EXTERN_C_END - Do we need this? 568 /* 569 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 570 Input A, x, y 571 A - FFTW matrix 572 x - user data 573 Options Database Keys: 574 + -mat_fftw_plannerflags - set FFTW planner flags 575 576 Level: intermediate 577 578 */ 579 580 EXTERN_C_BEGIN 581 #undef __FUNCT__ 582 #define __FUNCT__ "InputTransformFFT_FTTW" 583 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 584 { 585 PetscErrorCode ierr; 586 MPI_Comm comm=((PetscObject)A)->comm; 587 Mat_FFT *fft = (Mat_FFT*)A->data; 588 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 589 PetscInt N=fft->N, N1, n1 ,NM; 590 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 591 PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 592 PetscInt i,j,rank,size; 593 ptrdiff_t alloc_local,local_n0,local_0_start; 594 ptrdiff_t local_n1,local_1_start; 595 VecScatter vecscat; 596 IS list1,list2; 597 598 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 599 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 600 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 601 printf("Local ownership starts at %d\n",low); 602 603 switch (ndim){ 604 case 1: 605 SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW"); 606 break; 607 case 2: 608 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 609 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 610 611 ierr = PetscMalloc(sizeof(PetscInt)*((int)local_n0)*N1,&indx1);CHKERRQ(ierr); 612 ierr = PetscMalloc(sizeof(PetscInt)*((int)local_n0)*N1,&indx2);CHKERRQ(ierr); 613 printf("Val local_0_start = %d",local_0_start); 614 615 if (dim[1]%2==0) 616 NM = dim[1]+2; 617 else 618 NM = dim[1]+1; 619 620 for (i=0;i<local_n0;i++){ 621 for (j=0;j<dim[1];j++){ 622 tempindx = i*dim[1] + j; 623 tempindx1 = i*NM + j; 624 indx1[tempindx]=local_0_start*N1+tempindx; 625 indx2[tempindx]=low+tempindx1; 626 printf("Val tempindx1 = %d",tempindx1); 627 printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 628 printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 629 } 630 } 631 632 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 633 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 634 635 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 636 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 637 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 638 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 639 break; 640 641 case 3: 642 SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 643 break; 644 645 default: 646 SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 647 break; 648 } 649 650 return 0; 651 } 652 EXTERN_C_END 653 654 /* 655 //EXTERN_C_BEGIN - Do we need this? 656 #undef __FUNCT__ 657 #define __FUNCT__ "OutputTransformFFT" 658 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 659 { 660 PetscErrorCode ierr; 661 PetscFunctionBegin; 662 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 //EXTERN_C_END - Do we need this? 666 */ 667 668 EXTERN_C_BEGIN 669 #undef __FUNCT__ 670 #define __FUNCT__ "MatCreate_FFTW" 671 /* 672 MatCreate_FFTW - Creates a matrix object that provides FFT 673 via the external package FFTW 674 Options Database Keys: 675 + -mat_fftw_plannerflags - set FFTW planner flags 676 677 Level: intermediate 678 679 */ 680 681 PetscErrorCode MatCreate_FFTW(Mat A) 682 { 683 PetscErrorCode ierr; 684 MPI_Comm comm=((PetscObject)A)->comm; 685 Mat_FFT *fft=(Mat_FFT*)A->data; 686 Mat_FFTW *fftw; 687 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 688 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 689 PetscBool flg; 690 PetscInt p_flag,partial_dim=1,ctr; 691 PetscMPIInt size,rank; 692 ptrdiff_t *pdim; 693 694 PetscFunctionBegin; 695 //#if !defined(PETSC_USE_COMPLEX) 696 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 697 //#endif 698 699 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 700 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 701 printf("The code is coming here\n"); 702 ierr = MPI_Barrier(PETSC_COMM_WORLD); 703 704 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 705 pdim[0] = dim[0]; 706 for(ctr=1;ctr<ndim;ctr++) 707 { 708 partial_dim *= dim[ctr]; 709 pdim[ctr] = dim[ctr]; 710 } 711 //#if !defined(PETSC_USE_COMPLEX) 712 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 713 //#endif 714 715 // printf("partial dimension is %d",partial_dim); 716 if (size == 1) { 717 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 718 n = N; 719 } else { 720 ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 721 switch (ndim){ 722 case 1: 723 #if !defined(PETSC_USE_COMPLEX) 724 printf("The code is coming here\n"); 725 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 726 #endif 727 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 728 n = (PetscInt)local_n0; 729 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 730 // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 731 break; 732 case 2: 733 printf("The code is coming here\n"); 734 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 735 /* 736 PetscMPIInt rank; 737 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 738 PetscSynchronizedFlush(comm); 739 */ 740 n = (PetscInt)local_n0*dim[1]; 741 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 742 break; 743 case 3: 744 // printf("The value of alloc local is %d",alloc_local); 745 n = (PetscInt)local_n0*dim[1]*dim[2]; 746 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 747 break; 748 default: 749 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 750 // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 751 // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 752 n = (PetscInt)local_n0*partial_dim; 753 // printf("New partial dimension is %d %d %d",n,N,ndim); 754 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 755 break; 756 } 757 } 758 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 759 760 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 761 fft->data = (void*)fftw; 762 763 fft->n = n; 764 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 765 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 766 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 767 768 fftw->p_forward = 0; 769 fftw->p_backward = 0; 770 fftw->p_flag = FFTW_ESTIMATE; 771 772 if (size == 1){ 773 A->ops->mult = MatMult_SeqFFTW; 774 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 775 } else { 776 A->ops->mult = MatMult_MPIFFTW; 777 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 778 } 779 fft->matdestroy = MatDestroy_FFTW; 780 A->ops->getvecs = MatGetVecs_FFTW; 781 A->assembled = PETSC_TRUE; 782 printf("The code is coming here\n"); 783 #if !defined(PETSC_USE_COMPLEX) 784 printf("The code is coming here\n"); 785 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 786 // PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 787 #endif 788 789 /* get runtime options */ 790 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 791 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 792 if (flg) {fftw->p_flag = (unsigned)p_flag;} 793 PetscOptionsEnd(); 794 PetscFunctionReturn(0); 795 } 796 EXTERN_C_END 797 798 799 800 801