xref: /petsc/src/sys/fileio/fretrieve.c (revision 9371c9d470a9602b6d10a8bf50c9b2280a79e45a)
1 
2 /*
3       Code for opening and closing files.
4 */
5 #include <petscsys.h>
6 #if defined(PETSC_HAVE_PWD_H)
7 #include <pwd.h>
8 #endif
9 #include <ctype.h>
10 #include <sys/stat.h>
11 #if defined(PETSC_HAVE_UNISTD_H)
12 #include <unistd.h>
13 #endif
14 #if defined(PETSC_HAVE_SYS_UTSNAME_H)
15 #include <sys/utsname.h>
16 #endif
17 #include <fcntl.h>
18 #include <time.h>
19 #if defined(PETSC_HAVE_SYS_SYSTEMINFO_H)
20 #include <sys/systeminfo.h>
21 #endif
22 
23 /*
24    Private routine to delete tmp/shared storage
25 
26    This is called by MPI, not by users.
27 
28    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
29 
30 */
31 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelTmpShared(MPI_Comm comm, PetscMPIInt keyval, void *count_val, void *extra_state) {
32   PetscFunctionBegin;
33   PetscCallMPI(PetscInfo(NULL, "Deleting tmp/shared data in an MPI_Comm %ld\n", (long)comm));
34   PetscCallMPI(PetscFree(count_val));
35   PetscFunctionReturn(MPI_SUCCESS);
36 }
37 
38 /*@C
39    PetscGetTmp - Gets the name of the tmp directory
40 
41    Collective
42 
43    Input Parameters:
44 +  comm - MPI_Communicator that may share /tmp
45 -  len - length of string to hold name
46 
47    Output Parameter:
48 .  dir - directory name
49 
50    Options Database Keys:
51 +    -shared_tmp  - indicates the directory is shared among the MPI ranks
52 .    -not_shared_tmp - indicates the directory is not shared among the MPI ranks
53 -    -tmp tmpdir - name of the directory you wish to use as /tmp
54 
55    Environmental Variables:
56 +     PETSC_SHARED_TMP - indicates the directory is shared among the MPI ranks
57 .     PETSC_NOT_SHARED_TMP - indicates the directory is not shared among the MPI ranks
58 -     PETSC_TMP - name of the directory you wish to use as /tmp
59 
60    Level: developer
61 
62 @*/
63 PetscErrorCode PetscGetTmp(MPI_Comm comm, char dir[], size_t len) {
64   PetscBool flg;
65 
66   PetscFunctionBegin;
67   PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", dir, len, &flg));
68   if (!flg) { PetscCall(PetscStrncpy(dir, "/tmp", len)); }
69   PetscFunctionReturn(0);
70 }
71 
72 /*@C
73    PetscSharedTmp - Determines if all processors in a communicator share a
74          /tmp or have different ones.
75 
76    Collective
77 
78    Input Parameters:
79 .  comm - MPI_Communicator that may share /tmp
80 
81    Output Parameters:
82 .  shared - PETSC_TRUE or PETSC_FALSE
83 
84    Options Database Keys:
85 +    -shared_tmp  - indicates the directory is shared among the MPI ranks
86 .    -not_shared_tmp - indicates the directory is not shared among the MPI ranks
87 -    -tmp tmpdir - name of the directory you wish to use as /tmp
88 
89    Environmental Variables:
90 +     PETSC_SHARED_TMP  - indicates the directory is shared among the MPI ranks
91 .     PETSC_NOT_SHARED_TMP - indicates the directory is not shared among the MPI ranks
92 -     PETSC_TMP - name of the directory you wish to use as /tmp
93 
94    Level: developer
95 
96    Notes:
97    Stores the status as a MPI attribute so it does not have
98     to be redetermined each time.
99 
100       Assumes that all processors in a communicator either
101        1) have a common /tmp or
102        2) each has a separate /tmp
103       eventually we can write a fancier one that determines which processors
104       share a common /tmp.
105 
106    This will be very slow on runs with a large number of processors since
107    it requires O(p*p) file opens.
108 
109    If the environmental variable PETSC_TMP is set it will use this directory
110   as the "/tmp" directory.
111 
112 @*/
113 PetscErrorCode PetscSharedTmp(MPI_Comm comm, PetscBool *shared) {
114   PetscMPIInt        size, rank, *tagvalp, sum, cnt, i;
115   PetscBool          flg, iflg;
116   FILE              *fd;
117   static PetscMPIInt Petsc_Tmp_keyval = MPI_KEYVAL_INVALID;
118   int                err;
119 
120   PetscFunctionBegin;
121   PetscCallMPI(MPI_Comm_size(comm, &size));
122   if (size == 1) {
123     *shared = PETSC_TRUE;
124     PetscFunctionReturn(0);
125   }
126 
127   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_TMP", NULL, 0, &flg));
128   if (flg) {
129     *shared = PETSC_TRUE;
130     PetscFunctionReturn(0);
131   }
132 
133   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_TMP", NULL, 0, &flg));
134   if (flg) {
135     *shared = PETSC_FALSE;
136     PetscFunctionReturn(0);
137   }
138 
139   if (Petsc_Tmp_keyval == MPI_KEYVAL_INVALID) { PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_Tmp_keyval, NULL)); }
140 
141   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Tmp_keyval, (void **)&tagvalp, (int *)&iflg));
142   if (!iflg) {
143     char filename[PETSC_MAX_PATH_LEN], tmpname[PETSC_MAX_PATH_LEN];
144 
145     /* This communicator does not yet have a shared tmp attribute */
146     PetscCall(PetscMalloc1(1, &tagvalp));
147     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Tmp_keyval, tagvalp));
148 
149     PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", tmpname, 238, &iflg));
150     if (!iflg) {
151       PetscCall(PetscStrcpy(filename, "/tmp"));
152     } else {
153       PetscCall(PetscStrcpy(filename, tmpname));
154     }
155 
156     PetscCall(PetscStrcat(filename, "/petsctestshared"));
157     PetscCallMPI(MPI_Comm_rank(comm, &rank));
158 
159     /* each processor creates a /tmp file and all the later ones check */
160     /* this makes sure no subset of processors is shared */
161     *shared = PETSC_FALSE;
162     for (i = 0; i < size - 1; i++) {
163       if (rank == i) {
164         fd = fopen(filename, "w");
165         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
166         err = fclose(fd);
167         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
168       }
169       PetscCallMPI(MPI_Barrier(comm));
170       if (rank >= i) {
171         fd = fopen(filename, "r");
172         if (fd) cnt = 1;
173         else cnt = 0;
174         if (fd) {
175           err = fclose(fd);
176           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
177         }
178       } else cnt = 0;
179 
180       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
181       if (rank == i) unlink(filename);
182 
183       if (sum == size) {
184         *shared = PETSC_TRUE;
185         break;
186       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share /tmp ");
187     }
188     *tagvalp = (int)*shared;
189     PetscCall(PetscInfo(NULL, "processors %s %s\n", (*shared) ? "share" : "do NOT share", (iflg ? tmpname : "/tmp")));
190   } else *shared = (PetscBool)*tagvalp;
191   PetscFunctionReturn(0);
192 }
193 
194 /*@C
195   PetscSharedWorkingDirectory - Determines if all processors in a communicator share a working directory or have different ones.
196 
197   Collective
198 
199   Input Parameter:
200 . comm - MPI_Communicator that may share working directory
201 
202   Output Parameter:
203 . shared - PETSC_TRUE or PETSC_FALSE
204 
205   Options Database Keys:
206 + -shared_working_directory - indicates the directory is shared among the MPI ranks
207 - -not_shared_working_directory - indicates the directory is shared among the MPI ranks
208 
209   Environmental Variables:
210 + PETSC_SHARED_WORKING_DIRECTORY - indicates the directory is shared among the MPI ranks
211 - PETSC_NOT_SHARED_WORKING_DIRECTORY - indicates the directory is shared among the MPI ranks
212 
213   Level: developer
214 
215   Notes:
216   Stores the status as a MPI attribute so it does not have to be redetermined each time.
217 
218   Assumes that all processors in a communicator either
219 $   1) have a common working directory or
220 $   2) each has a separate working directory
221   eventually we can write a fancier one that determines which processors share a common working directory.
222 
223   This will be very slow on runs with a large number of processors since it requires O(p*p) file opens.
224 @*/
225 PetscErrorCode PetscSharedWorkingDirectory(MPI_Comm comm, PetscBool *shared) {
226   PetscMPIInt        size, rank, *tagvalp, sum, cnt, i;
227   PetscBool          flg, iflg;
228   FILE              *fd;
229   static PetscMPIInt Petsc_WD_keyval = MPI_KEYVAL_INVALID;
230   int                err;
231 
232   PetscFunctionBegin;
233   PetscCallMPI(MPI_Comm_size(comm, &size));
234   if (size == 1) {
235     *shared = PETSC_TRUE;
236     PetscFunctionReturn(0);
237   }
238 
239   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
240   if (flg) {
241     *shared = PETSC_TRUE;
242     PetscFunctionReturn(0);
243   }
244 
245   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
246   if (flg) {
247     *shared = PETSC_FALSE;
248     PetscFunctionReturn(0);
249   }
250 
251   if (Petsc_WD_keyval == MPI_KEYVAL_INVALID) { PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_WD_keyval, NULL)); }
252 
253   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_WD_keyval, (void **)&tagvalp, (int *)&iflg));
254   if (!iflg) {
255     char filename[PETSC_MAX_PATH_LEN];
256 
257     /* This communicator does not yet have a shared  attribute */
258     PetscCall(PetscMalloc1(1, &tagvalp));
259     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_WD_keyval, tagvalp));
260 
261     PetscCall(PetscGetWorkingDirectory(filename, 240));
262     PetscCall(PetscStrcat(filename, "/petsctestshared"));
263     PetscCallMPI(MPI_Comm_rank(comm, &rank));
264 
265     /* each processor creates a  file and all the later ones check */
266     /* this makes sure no subset of processors is shared */
267     *shared = PETSC_FALSE;
268     for (i = 0; i < size - 1; i++) {
269       if (rank == i) {
270         fd = fopen(filename, "w");
271         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
272         err = fclose(fd);
273         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
274       }
275       PetscCallMPI(MPI_Barrier(comm));
276       if (rank >= i) {
277         fd = fopen(filename, "r");
278         if (fd) cnt = 1;
279         else cnt = 0;
280         if (fd) {
281           err = fclose(fd);
282           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
283         }
284       } else cnt = 0;
285 
286       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
287       if (rank == i) unlink(filename);
288 
289       if (sum == size) {
290         *shared = PETSC_TRUE;
291         break;
292       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share working directory");
293     }
294     *tagvalp = (int)*shared;
295   } else *shared = (PetscBool)*tagvalp;
296   PetscCall(PetscInfo(NULL, "processors %s working directory\n", (*shared) ? "shared" : "do NOT share"));
297   PetscFunctionReturn(0);
298 }
299 
300 /*@C
301     PetscFileRetrieve - Obtains a file from a URL or compressed
302         and copies into local disk space as uncompressed.
303 
304     Collective
305 
306     Input Parameters:
307 +   comm     - processors accessing the file
308 .   url      - name of file, including entire URL (with or without .gz)
309 -   llen     - length of localname
310 
311     Output Parameters:
312 +   localname - name of local copy of file - valid on only process zero
313 -   found - if found or retrieved the file - valid on all processes
314 
315     Notes:
316     if the file already exists local this function just returns without downloading it.
317 
318     Level: intermediate
319 @*/
320 PetscErrorCode PetscFileRetrieve(MPI_Comm comm, const char url[], char localname[], size_t llen, PetscBool *found) {
321   char        buffer[PETSC_MAX_PATH_LEN], *par, *tlocalname, name[PETSC_MAX_PATH_LEN];
322   FILE       *fp;
323   PetscMPIInt rank;
324   size_t      len = 0;
325   PetscBool   flg1, flg2, flg3, flg4, download, compressed = PETSC_FALSE;
326 
327   PetscFunctionBegin;
328   PetscCallMPI(MPI_Comm_rank(comm, &rank));
329   if (rank == 0) {
330     *found = PETSC_FALSE;
331 
332     PetscCall(PetscStrstr(url, ".gz", &par));
333     if (par) {
334       PetscCall(PetscStrlen(par, &len));
335       if (len == 3) compressed = PETSC_TRUE;
336     }
337 
338     PetscCall(PetscStrncmp(url, "ftp://", 6, &flg1));
339     PetscCall(PetscStrncmp(url, "http://", 7, &flg2));
340     PetscCall(PetscStrncmp(url, "file://", 7, &flg3));
341     PetscCall(PetscStrncmp(url, "https://", 8, &flg4));
342     download = (PetscBool)(flg1 || flg2 || flg3 || flg4);
343 
344     if (!download && !compressed) {
345       PetscCall(PetscStrncpy(localname, url, llen));
346       PetscCall(PetscTestFile(url, 'r', found));
347       if (*found) {
348         PetscCall(PetscInfo(NULL, "Found file %s\n", url));
349       } else {
350         PetscCall(PetscInfo(NULL, "Did not find file %s\n", url));
351       }
352       goto done;
353     }
354 
355     /* look for uncompressed file in requested directory */
356     if (compressed) {
357       PetscCall(PetscStrncpy(localname, url, llen));
358       PetscCall(PetscStrstr(localname, ".gz", &par));
359       *par = 0; /* remove .gz extension */
360       PetscCall(PetscTestFile(localname, 'r', found));
361       if (*found) goto done;
362     }
363 
364     /* look for file in current directory */
365     PetscCall(PetscStrrchr(url, '/', &tlocalname));
366     PetscCall(PetscStrncpy(localname, tlocalname, llen));
367     if (compressed) {
368       PetscCall(PetscStrstr(localname, ".gz", &par));
369       *par = 0; /* remove .gz extension */
370     }
371     PetscCall(PetscTestFile(localname, 'r', found));
372     if (*found) goto done;
373 
374     if (download) {
375       /* local file is not already here so use curl to get it */
376       PetscCall(PetscStrncpy(localname, tlocalname, llen));
377       PetscCall(PetscStrcpy(buffer, "curl --fail --silent --show-error "));
378       PetscCall(PetscStrcat(buffer, url));
379       PetscCall(PetscStrcat(buffer, " > "));
380       PetscCall(PetscStrcat(buffer, localname));
381 #if defined(PETSC_HAVE_POPEN)
382       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
383       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
384 #else
385       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
386 #endif
387       PetscCall(PetscTestFile(localname, 'r', found));
388       if (*found) {
389         FILE *fd;
390         char  buf[1024], *str, *substring;
391 
392         /* check if the file didn't exist so it downloaded an HTML message instead */
393         fd = fopen(localname, "r");
394         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscTestFile() indicates %s exists but fopen() cannot open it", localname);
395         str = fgets(buf, sizeof(buf) - 1, fd);
396         while (str) {
397           PetscCall(PetscStrstr(buf, "<!DOCTYPE html>", &substring));
398           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
399           PetscCall(PetscStrstr(buf, "Not Found", &substring));
400           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
401           str = fgets(buf, sizeof(buf) - 1, fd);
402         }
403         fclose(fd);
404       }
405     } else if (compressed) {
406       PetscCall(PetscTestFile(url, 'r', found));
407       if (!*found) goto done;
408       PetscCall(PetscStrncpy(localname, url, llen));
409     }
410     if (compressed) {
411       PetscCall(PetscStrrchr(localname, '/', &tlocalname));
412       PetscCall(PetscStrncpy(name, tlocalname, PETSC_MAX_PATH_LEN));
413       PetscCall(PetscStrstr(name, ".gz", &par));
414       *par = 0; /* remove .gz extension */
415       /* uncompress file */
416       PetscCall(PetscStrcpy(buffer, "gzip -c -d "));
417       PetscCall(PetscStrcat(buffer, localname));
418       PetscCall(PetscStrcat(buffer, " > "));
419       PetscCall(PetscStrcat(buffer, name));
420 #if defined(PETSC_HAVE_POPEN)
421       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
422       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
423 #else
424       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
425 #endif
426       PetscCall(PetscStrncpy(localname, name, llen));
427       PetscCall(PetscTestFile(localname, 'r', found));
428     }
429   }
430 done:
431   PetscCallMPI(MPI_Bcast(found, 1, MPIU_BOOL, 0, comm));
432   PetscCallMPI(MPI_Bcast(localname, llen, MPI_CHAR, 0, comm));
433   PetscFunctionReturn(0);
434 }
435