libdap  Updated for version 3.18.3
HTTPConnect.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 #include "config.h"
28 
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32 
33 #include <sys/stat.h>
34 
35 #ifdef WIN32
36 #include <io.h>
37 #endif
38 
39 #include <string>
40 #include <vector>
41 #include <functional>
42 #include <algorithm>
43 #include <sstream>
44 #include <fstream>
45 #include <iterator>
46 #include <cstdlib>
47 #include <cstring>
48 #include <cerrno>
49 
50 //#define DODS_DEBUG2
51 //#define HTTP_TRACE
52 //#define DODS_DEBUG
53 
54 #undef USE_GETENV
55 
56 
57 #include "debug.h"
58 #include "mime_util.h"
59 #include "media_types.h"
60 #include "GNURegex.h"
61 #include "HTTPCache.h"
62 #include "HTTPConnect.h"
63 #include "RCReader.h"
64 #include "HTTPResponse.h"
65 #include "HTTPCacheResponse.h"
66 
67 using namespace std;
68 
69 namespace libdap {
70 
71 // These global variables are not MT-Safe, but I'm leaving them as is because
72 // they are used only for debugging (set them in a debugger like gdb or ddd).
73 // They are not static because I think that many debuggers cannot access
74 // static variables. 08/07/02 jhrg
75 
76 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
77 int www_trace = 0;
78 
79 // Keep the temporary files; useful for debugging.
80 int dods_keep_temps = 0;
81 
82 #define CLIENT_ERR_MIN 400
83 #define CLIENT_ERR_MAX 417
84 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
85  {
86  "Bad Request:",
87  "Unauthorized: Contact the server administrator.",
88  "Payment Required.",
89  "Forbidden: Contact the server administrator.",
90  "Not Found: The data source or server could not be found.\n\
91  Often this means that the OPeNDAP server is missing or needs attention;\n\
92  Please contact the server administrator.",
93  "Method Not Allowed.",
94  "Not Acceptable.",
95  "Proxy Authentication Required.",
96  "Request Time-out.",
97  "Conflict.",
98  "Gone:.",
99  "Length Required.",
100  "Precondition Failed.",
101  "Request Entity Too Large.",
102  "Request URI Too Large.",
103  "Unsupported Media Type.",
104  "Requested Range Not Satisfiable.",
105  "Expectation Failed."
106  };
107 
108 #define SERVER_ERR_MIN 500
109 #define SERVER_ERR_MAX 505
110 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
111  {
112  "Internal Server Error.",
113  "Not Implemented.",
114  "Bad Gateway.",
115  "Service Unavailable.",
116  "Gateway Time-out.",
117  "HTTP Version Not Supported."
118  };
119 
122 static string
123 http_status_to_string(int status)
124 {
125  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
126  return string(http_client_errors[status - CLIENT_ERR_MIN]);
127  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
128  return string(http_server_errors[status - SERVER_ERR_MIN]);
129  else
130  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
131 }
132 
133 static ObjectType
134 determine_object_type(const string &header_value)
135 {
136  // DAP4 Data: application/vnd.opendap.dap4.data
137  // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
138 
139  string::size_type plus = header_value.find('+');
140  string base_type;
141  string type_extension = "";
142  if (plus != string::npos) {
143  base_type= header_value.substr(0, plus);
144  type_extension = header_value.substr(plus+1);
145  }
146  else
147  base_type = header_value;
148 
149  if (base_type == DMR_Content_Type
150  || (base_type.find("application/") != string::npos
151  && base_type.find("dap4.dataset-metadata") != string::npos)) {
152  if (type_extension == "xml")
153  return dap4_dmr;
154  else
155  return unknown_type;
156  }
157  else if (base_type == DAP4_DATA_Content_Type
158  || (base_type.find("application/") != string::npos
159  && base_type.find("dap4.data") != string::npos)) {
160  return dap4_data;
161  }
162  else if (header_value.find("text/html") != string::npos) {
163  return web_error;
164  }
165  else
166  return unknown_type;
167 }
168 
173 class ParseHeader : public unary_function<const string &, void>
174 {
175  ObjectType type; // What type of object is in the stream?
176  string server; // Server's version string.
177  string protocol; // Server's protocol version.
178  string location; // Url returned by server
179 
180 public:
181  ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
182  { }
183 
184  void operator()(const string &line)
185  {
186  string name, value;
187  parse_mime_header(line, name, value);
188 
189  DBG2(cerr << name << ": " << value << endl);
190 
191  // Content-Type is used to determine the content of DAP4 responses, but allow the
192  // Content-Description header to override CT o preserve operation with DAP2 servers.
193  // jhrg 11/12/13
194  if (type == unknown_type && name == "content-type") {
195  type = determine_object_type(value); // see above
196  }
197  if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
198  type = get_description_type(value); // defined in mime_util.cc
199  }
200  // The second test (== "dods/0.0") tests if xopendap-server has already
201  // been seen. If so, use that header in preference to the old
202  // XDODS-Server header. jhrg 2/7/06
203  else if (name == "xdods-server" && server == "dods/0.0") {
204  server = value;
205  }
206  else if (name == "xopendap-server") {
207  server = value;
208  }
209  else if (name == "xdap") {
210  protocol = value;
211  }
212  else if (server == "dods/0.0" && name == "server") {
213  server = value;
214  }
215  else if (name == "location") {
216  location = value;
217  }
218  }
219 
220  ObjectType get_object_type()
221  {
222  return type;
223  }
224 
225  string get_server()
226  {
227  return server;
228  }
229 
230  string get_protocol()
231  {
232  return protocol;
233  }
234 
235  string get_location() {
236  return location;
237  }
238 };
239 
255 static size_t
256 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
257 {
258  DBG2(cerr << "Inside the header parser." << endl);
259  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
260 
261  // Grab the header, minus the trailing newline. Or \r\n pair.
262  string complete_line;
263  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
264  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
265  else
266  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
267 
268  // Store all non-empty headers that are not HTTP status codes
269  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
270  DBG(cerr << "Header line: " << complete_line << endl);
271  hdrs->push_back(complete_line);
272  }
273 
274  return size * nmemb;
275 }
276 
278 static int
279 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
280 {
281  string message(msg, size);
282 
283  switch (info) {
284  case CURLINFO_TEXT:
285  cerr << "Text: " << message; break;
286  case CURLINFO_HEADER_IN:
287  cerr << "Header in: " << message; break;
288  case CURLINFO_HEADER_OUT:
289  cerr << "Header out: " << message; break;
290  case CURLINFO_DATA_IN:
291  cerr << "Data in: " << message; break;
292  case CURLINFO_DATA_OUT:
293  cerr << "Data out: " << message; break;
294  case CURLINFO_END:
295  cerr << "End: " << message; break;
296 #ifdef CURLINFO_SSL_DATA_IN
297  case CURLINFO_SSL_DATA_IN:
298  cerr << "SSL Data in: " << message; break;
299 #endif
300 #ifdef CURLINFO_SSL_DATA_OUT
301  case CURLINFO_SSL_DATA_OUT:
302  cerr << "SSL Data out: " << message; break;
303 #endif
304  default:
305  cerr << "Curl info: " << message; break;
306  }
307  return 0;
308 }
309 
313 void
314 HTTPConnect::www_lib_init()
315 {
316  d_curl = curl_easy_init();
317  if (!d_curl)
318  throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
319 
320  // Now set options that will remain constant for the duration of this
321  // CURL object.
322 
323  // Set the proxy host.
324  if (!d_rcr->get_proxy_server_host().empty()) {
325  DBG(cerr << "Setting up a proxy server." << endl);
326  DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
327  << endl);
328  DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
329  << endl);
330  DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
331  << endl);
332  curl_easy_setopt(d_curl, CURLOPT_PROXY,
333  d_rcr->get_proxy_server_host().c_str());
334  curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
335  d_rcr->get_proxy_server_port());
336 
337  // As of 4/21/08 only NTLM, Digest and Basic work.
338 #ifdef CURLOPT_PROXYAUTH
339  curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
340 #endif
341 
342  // Password might not be required. 06/21/04 jhrg
343  if (!d_rcr->get_proxy_server_userpw().empty())
344  curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
345  d_rcr->get_proxy_server_userpw().c_str());
346  }
347 
348  curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
349  // We have to set FailOnError to false for any of the non-Basic
350  // authentication schemes to work. 07/28/03 jhrg
351  curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
352 
353  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
354  // choosing the the 'safest' one supported by the server.
355  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
356  curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
357 
358  curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
359  curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
360  curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
361  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
362  // param of save_raw_http_headers to a vector<string> object.
363 
364  // Follow 302 (redirect) responses
365  curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
366  curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
367 
368  // If the user turns off SSL validation...
369  if (d_rcr->get_validate_ssl() == 0) {
370  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
371  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
372  }
373 
374  // Look to see if cookies are turned on in the .dodsrc file. If so,
375  // activate here. We honor 'session cookies' (cookies without an
376  // expiration date) here so that session-based SSO systems will work as
377  // expected.
378  if (!d_cookie_jar.empty()) {
379  DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
380  curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
381  curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
382  }
383 
384  if (www_trace) {
385  cerr << "Curl version: " << curl_version() << endl;
386  curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
387  curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
388  }
389 }
390 
394 class BuildHeaders : public unary_function<const string &, void>
395 {
396  struct curl_slist *d_cl;
397 
398 public:
399  BuildHeaders() : d_cl(0)
400  {}
401 
402  void operator()(const string &header)
403  {
404  DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
405  << endl);
406  d_cl = curl_slist_append(d_cl, header.c_str());
407  }
408 
409  struct curl_slist *get_headers()
410  {
411  return d_cl;
412  }
413 };
414 
429 long
430 HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers)
431 {
432  curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
433 
434 #ifdef WIN32
435  // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
436  // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
437  // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
438  // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
439  // this issue is that one should not pass a FILE * to a windows DLL. Close
440  // inspection of libcurl yields that their default write function when using
441  // the CURLOPT_WRITEDATA is just "fwrite".
442  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
443  curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
444 #else
445  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
446 #endif
447 
448  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
449  ostream_iterator<string>(cerr, "\n")));
450 
451  BuildHeaders req_hdrs;
452  req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
453  req_hdrs);
454  if (headers)
455  req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
456 
457  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
458 
459  // Turn off the proxy for this URL?
460  bool temporary_proxy = false;
461  if ((temporary_proxy = url_uses_no_proxy_for(url))) {
462  DBG(cerr << "Suppress proxy for url: " << url << endl);
463  curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
464  }
465 
466  string::size_type at_sign = url.find('@');
467  // Assume username:password present *and* assume it's an HTTP URL; it *is*
468  // HTTPConnect, after all. 7 is position after "http://"; the second arg
469  // to substr() is the sub string length.
470  if (at_sign != url.npos)
471  d_upstring = url.substr(7, at_sign - 7);
472 
473  if (!d_upstring.empty())
474  curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
475 
476  // Pass save_raw_http_headers() a pointer to the vector<string> where the
477  // response headers may be stored. Callers can use the resp_hdrs
478  // value/result parameter to get the raw response header information .
479  curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
480 
481  // This is the call that causes curl to go and get the remote resource and "write it down"
482  // utilizing the configuration state that has been previously conditioned by various perturbations
483  // of calls to curl_easy_setopt().
484  CURLcode res = curl_easy_perform(d_curl);
485 
486  // Free the header list and null the value in d_curl.
487  curl_slist_free_all(req_hdrs.get_headers());
488  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
489 
490  // Reset the proxy?
491  if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
492  curl_easy_setopt(d_curl, CURLOPT_PROXY,
493  d_rcr->get_proxy_server_host().c_str());
494 
495  if (res != 0)
496  throw Error(d_error_buffer);
497 
498  long status;
499  res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
500  if (res != 0)
501  throw Error(d_error_buffer);
502 
503  char *ct_ptr = 0;
504  res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
505  if (res == CURLE_OK && ct_ptr)
506  d_content_type = ct_ptr;
507  else
508  d_content_type = "";
509 
510  return status;
511 }
512 
516 bool
517 HTTPConnect::url_uses_proxy_for(const string &url)
518 {
519  if (d_rcr->is_proxy_for_used()) {
520  Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
521  int index = 0, matchlen;
522  return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
523  }
524 
525  return false;
526 }
527 
531 bool
532 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
533 {
534  return d_rcr->is_no_proxy_for_used()
535  && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
536 }
537 
538 // Public methods. Mostly...
539 
546 HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""),
547  d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
548 
549 {
550  d_accept_deflate = rcr->get_deflate();
551  d_rcr = rcr;
552 
553  // Load in the default headers to send with a request. The empty Pragma
554  // headers overrides libcurl's default Pragma: no-cache header (which
555  // will disable caching by Squid, et c.). The User-Agent header helps
556  // make server logs more readable. 05/05/03 jhrg
557  d_request_headers.push_back(string("Pragma:"));
558  string user_agent = string("User-Agent: ") + string(CNAME)
559  + string("/") + string(CVER);
560  d_request_headers.push_back(user_agent);
561  if (d_accept_deflate)
562  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
563 
564  // HTTPCache::instance returns a valid ptr or 0.
565  if (d_rcr->get_use_cache())
566  d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
567  else
568  d_http_cache = 0;
569 
570  DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
571  << ")" << endl);
572 
573  if (d_http_cache) {
574  d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
575  d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
576  d_http_cache->set_max_size(d_rcr->get_max_cache_size());
577  d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
578  d_http_cache->set_default_expiration(d_rcr->get_default_expires());
579  d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
580  }
581 
582  d_cookie_jar = rcr->get_cookie_jar();
583 
584  www_lib_init(); // This may throw either Error or InternalErr
585 }
586 
587 HTTPConnect::~HTTPConnect()
588 {
589  DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
590 
591  curl_easy_cleanup(d_curl);
592 
593  DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
594 }
595 
597 class HeaderMatch : public unary_function<const string &, bool> {
598  const string &d_header;
599  public:
600  HeaderMatch(const string &header) : d_header(header) {}
601  bool operator()(const string &arg) { return arg.find(d_header) == 0; }
602 };
603 
616 HTTPResponse *
617 HTTPConnect::fetch_url(const string &url)
618 {
619 #ifdef HTTP_TRACE
620  cout << "GET " << url << " HTTP/1.0" << endl;
621 #endif
622 
623  HTTPResponse *stream;
624 
625  if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) {
626  stream = caching_fetch_url(url);
627  }
628  else {
629  stream = plain_fetch_url(url);
630  }
631 
632 #ifdef HTTP_TRACE
633  stringstream ss;
634  ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
635  for (size_t i = 0; i < stream->get_headers()->size(); i++) {
636  ss << stream->get_headers()->at(i) << endl;
637  }
638  cout << ss.str();
639 #endif
640 
641  ParseHeader parser;
642 
643  // An apparent quirk of libcurl is that it does not pass the Content-type
644  // header to the callback used to save them, but check and add it from the
645  // saved state variable only if it's not there (without this a test failed
646  // in HTTPCacheTest). jhrg 11/12/13
647  if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
648  HeaderMatch("Content-Type:")) == stream->get_headers()->end())
649  stream->get_headers()->push_back("Content-Type: " + d_content_type);
650 
651  parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
652 
653 #ifdef HTTP_TRACE
654  cout << endl << endl;
655 #endif
656 
657  // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
658  if (parser.get_location() != "" &&
659  url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
660  delete stream;
661  return fetch_url(parser.get_location());
662  }
663 
664  stream->set_type(parser.get_object_type()); // uses the value of content-description
665 
666  stream->set_version(parser.get_server());
667  stream->set_protocol(parser.get_protocol());
668 
669  if (d_use_cpp_streams) {
670  stream->transform_to_cpp();
671  }
672 
673  return stream;
674 }
675 
676 // Look around for a reasonable place to put a temporary file. Check first
677 // the value of the TMPDIR env var. If that does not yeild a path that's
678 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
679 // defined in stdio.h. If both come up empty, then use `./'.
680 
681 // Change this to a version that either returns a string or an open file
682 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
683 // (see open()) to make it more secure. Ideal solution: get deserialize()
684 // methods to read from a stream returned by libcurl, not from a temporary
685 // file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
686 static string
687 get_tempfile_template(const string &file_template)
688 {
689  string c;
690 
691  // Windows has one idea of the standard name(s) for a temporary files dir
692 #ifdef WIN32
693  // white list for a WIN32 directory
694  Regex directory("[-a-zA-Z0-9_:\\]*");
695 
696  // If we're OK to use getenv(), try it.
697 #ifdef USE_GETENV
698  c = getenv("TEMP");
699  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
700  goto valid_temp_directory;
701 
702  c= getenv("TMP");
703  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
704  goto valid_temp_directory;
705 #endif // USE_GETENV
706 
707  // The windows default
708  c = "c:\tmp";
709  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
710  goto valid_temp_directory;
711 
712 #else // Unix/Linux/OSX has another...
713  // white list for a directory
714  Regex directory("[-a-zA-Z0-9_/]*");
715 #ifdef USE_GETENV
716  c = getenv("TMPDIR");
717  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
718  goto valid_temp_directory;
719 #endif // USE_GETENV
720 
721  // Unix defines this sometimes - if present, use it.
722 #ifdef P_tmpdir
723  if (access(P_tmpdir, W_OK | R_OK) == 0) {
724  c = P_tmpdir;
725  goto valid_temp_directory;
726  }
727 #endif
728 
729  // The Unix default
730  c = "/tmp";
731  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
732  goto valid_temp_directory;
733 
734 #endif // WIN32
735 
736  // If we found nothing useful, use the current directory
737  c = ".";
738 
739 valid_temp_directory:
740 
741 #ifdef WIN32
742  c += "\\" + file_template;
743 #else
744  c += "/" + file_template;
745 #endif
746 
747  return c;
748 }
749 
768 string
769 get_temp_file(FILE *&stream) throw(Error)
770 {
771  string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
772 
773  vector<char> pathname(dods_temp.length() + 1);
774 
775  strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
776 
777  DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
778 
779  // Open truncated for update. NB: mkstemp() returns a file descriptor.
780 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
781  stream = fopen(_mktemp(&pathname[0]), "w+b");
782 #else
783  // Make sure that temp files are accessible only by the owner.
784  int mask = umask(077);
785  if (mask < 0)
786  throw Error("Could not set the file creation mask: " + string(strerror(errno)));
787  int fd = mkstemp(&pathname[0]);
788  if (fd < 0)
789  throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
790 
791  stream = fdopen(fd, "w+");
792  umask(mask);
793 #endif
794 
795  if (!stream)
796  throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
797 
798  dods_temp = &pathname[0];
799  return dods_temp;
800 }
801 
802 
808 void
809 close_temp(FILE *s, const string &name)
810 {
811  int res = fclose(s);
812  if (res)
813  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
814 
815  res = unlink(name.c_str());
816  if (res != 0)
817  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
818 }
819 
841 HTTPResponse *
842 HTTPConnect::caching_fetch_url(const string &url)
843 {
844  DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
845 
846  vector<string> *headers = new vector<string>;
847  string file_name;
848  FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
849  if (!s) {
850  // url not in cache; get it and cache it
851  DBGN(cerr << "no; getting response and caching." << endl);
852  delete headers; headers = 0;
853  time_t now = time(0);
854  HTTPResponse *rs = plain_fetch_url(url);
855  d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
856 
857  return rs;
858  }
859  else { // url in cache
860  DBGN(cerr << "yes... ");
861 
862  if (d_http_cache->is_url_valid(url)) { // url in cache and valid
863  DBGN(cerr << "and it's valid; using cached response." << endl);
864  HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
865  return crs;
866  }
867  else { // url in cache but not valid; validate
868  DBGN(cerr << "but it's not valid; validating... ");
869 
870  d_http_cache->release_cached_response(s); // This closes 's'
871  headers->clear();
872  vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
873  FILE *body = 0;
874  string dods_temp = get_temp_file(body);
875  time_t now = time(0); // When was the request made (now).
876  long http_status;
877 
878  try {
879  http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
880  rewind(body);
881  }
882  catch (Error &e) {
883  close_temp(body, dods_temp);
884  delete headers;
885  throw ;
886  }
887 
888  switch (http_status) {
889  case 200: { // New headers and new body
890  DBGN(cerr << "read a new response; caching." << endl);
891 
892  d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
893  HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
894 
895  return rs;
896  }
897 
898  case 304: { // Just new headers, use cached body
899  DBGN(cerr << "cached response valid; updating." << endl);
900 
901  close_temp(body, dods_temp);
902  d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
903  string file_name;
904  FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
905  HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
906  return crs;
907  }
908 
909  default: { // Oops.
910  close_temp(body, dods_temp);
911  if (http_status >= 400) {
912  delete headers; headers = 0;
913  string msg = "Error while reading the URL: ";
914  msg += url;
915  msg
916  += ".\nThe OPeNDAP server returned the following message:\n";
917  msg += http_status_to_string(http_status);
918  throw Error(msg);
919  }
920  else {
921  delete headers; headers = 0;
922  throw InternalErr(__FILE__, __LINE__,
923  "Bad response from the HTTP server: " + long_to_string(http_status));
924  }
925  }
926  }
927  }
928  }
929 
930  throw InternalErr(__FILE__, __LINE__, "Should never get here");
931 }
932 
944 HTTPResponse *
945 HTTPConnect::plain_fetch_url(const string &url)
946 {
947  DBG(cerr << "Getting URL: " << url << endl);
948  FILE *stream = 0;
949  string dods_temp = get_temp_file(stream);
950  vector<string> *resp_hdrs = new vector<string>;
951 
952  int status = -1;
953  try {
954  status = read_url(url, stream, resp_hdrs); // Throws Error.
955  if (status >= 400) {
956  // delete resp_hdrs; resp_hdrs = 0;
957  string msg = "Error while reading the URL: ";
958  msg += url;
959  msg += ".\nThe OPeNDAP server returned the following message:\n";
960  msg += http_status_to_string(status);
961  throw Error(msg);
962  }
963  }
964 
965  catch (Error &e) {
966  delete resp_hdrs;
967  close_temp(stream, dods_temp);
968  throw;
969  }
970 
971 #if 0
972  if (d_use_cpp_streams) {
973  fclose(stream);
974  fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
975  return new HTTPResponse(in, status, resp_hdrs, dods_temp);
976  }
977  else {
978 #endif
979  rewind(stream);
980  return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
981 #if 0
982 }
983 #endif
984 }
985 
997 void
999 {
1000  d_accept_deflate = deflate;
1001 
1002  if (d_accept_deflate) {
1003  if (find(d_request_headers.begin(), d_request_headers.end(),
1004  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
1005  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
1006  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1007  ostream_iterator<string>(cerr, "\n")));
1008  }
1009  else {
1010  vector<string>::iterator i;
1011  i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1012  bind2nd(equal_to<string>(),
1013  string("Accept-Encoding: deflate, gzip, compress")));
1014  d_request_headers.erase(i, d_request_headers.end());
1015  }
1016 }
1017 
1026 void
1027 HTTPConnect::set_xdap_protocol(int major, int minor)
1028 {
1029  // Look for, and remove if one exists, an XDAP-Accept header
1030  vector<string>::iterator i;
1031  i = find_if(d_request_headers.begin(), d_request_headers.end(),
1032  HeaderMatch("XDAP-Accept:"));
1033  if (i != d_request_headers.end())
1034  d_request_headers.erase(i);
1035 
1036  // Record and add the new header value
1037  d_dap_client_protocol_major = major;
1038  d_dap_client_protocol_minor = minor;
1039  ostringstream xdap_accept;
1040  xdap_accept << "XDAP-Accept: " << major << "." << minor;
1041 
1042  d_request_headers.push_back(xdap_accept.str());
1043 
1044  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1045  ostream_iterator<string>(cerr, "\n")));
1046 }
1047 
1063 void
1064 HTTPConnect::set_credentials(const string &u, const string &p)
1065 {
1066  if (u.empty())
1067  return;
1068 
1069  // Store the credentials locally.
1070  d_username = u;
1071  d_password = p;
1072 
1073  d_upstring = u + ":" + p;
1074 }
1075 
1076 } // namespace libdap
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:635
void set_credentials(const string &u, const string &p)
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:129
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:724
STL namespace.
ObjectType
The type of object in the stream coming from the data server.
Definition: ObjectType.h:58
HTTPResponse * fetch_url(const string &url)
Definition: HTTPConnect.cc:617
A class for software fault reporting.
Definition: InternalErr.h:64
void parse_mime_header(const string &header, string &name, string &value)
Definition: mime_util.cc:898
int match(const char *s, int len, int pos=0)
Does the pattern match.
Definition: GNURegex.cc:115
ObjectType get_description_type(const string &value)
Definition: mime_util.cc:339
void close_temp(FILE *s, const string &name)
Definition: HTTPConnect.cc:809
string get_temp_file(FILE *&stream)
Definition: HTTPConnect.cc:769
void set_accept_deflate(bool defalte)
Definition: HTTPConnect.cc:998
void set_always_validate(bool validate)
Definition: HTTPCache.cc:841
void set_xdap_protocol(int major, int minor)
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:819
A class for error processing.
Definition: Error.h:90
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:690
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:772