aboutsummaryrefslogtreecommitdiff
path: root/networking/wget.c
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2009-06-28 03:33:57 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2009-06-28 03:33:57 +0200
commitf1fab0924285cca27903a1e4a4498c7205810742 (patch)
tree3b1224f5faeb82e4f9a1ba5d9adf29579a103e23 /networking/wget.c
parentab0c8d7b35fbbe3e038800587cde0e3a99907187 (diff)
downloadbusybox-f1fab0924285cca27903a1e4a4498c7205810742.tar.gz
wget: fix redirection from HTTP to FTP server
while at it, sanitize redirection in general; add printout of every redirection hop; make sure we won't print any non-ASCII garbage from remote server in error meesages. function old new delta sanitize_string - 14 +14 parse_url 294 301 +7 gethdr 190 197 +7 wget_main 2326 2331 +5 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 3/0 up/down: 33/0) Total: 33 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'networking/wget.c')
-rw-r--r--networking/wget.c277
1 files changed, 140 insertions, 137 deletions
diff --git a/networking/wget.c b/networking/wget.c
index f826d1a8b..d518286a4 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -6,7 +6,6 @@
*
* Licensed under GPLv2, see file LICENSE in this tarball for details.
*/
-
#include "libbb.h"
struct host_info {
@@ -239,6 +238,15 @@ static char *base64enc_512(char buf[512], const char *str)
}
#endif
+static char* sanitize_string(char *s)
+{
+ unsigned char *p = (void *) s;
+ while (*p >= ' ')
+ p++;
+ *p = '\0';
+ return s;
+}
+
static FILE *open_socket(len_and_sockaddr *lsa)
{
FILE *fp;
@@ -294,7 +302,7 @@ static void parse_url(char *src_url, struct host_info *h)
h->host = url + 6;
h->is_ftp = 1;
} else
- bb_error_msg_and_die("not an http or ftp url: %s", url);
+ bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
// FYI:
// "Real" wget 'http://busybox.net?var=a/b' sends this request:
@@ -360,7 +368,7 @@ static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
/* verify we are at the end of the header name */
if (*s != ':')
- bb_error_msg_and_die("bad header line: %s", buf);
+ bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
/* locate the start of the header value */
*s++ = '\0';
@@ -433,7 +441,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
sfp = open_socket(lsa);
if (ftpcmd(NULL, NULL, sfp, buf) != 220)
- bb_error_msg_and_die("%s", buf+4);
+ bb_error_msg_and_die("%s", sanitize_string(buf+4));
/*
* Splitting username:password pair,
@@ -450,7 +458,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
break;
/* fall through (failed login) */
default:
- bb_error_msg_and_die("ftp login: %s", buf+4);
+ bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
}
ftpcmd("TYPE I", NULL, sfp, buf);
@@ -471,7 +479,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
*/
if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
pasv_error:
- bb_error_msg_and_die("bad response to %s: %s", "PASV", buf);
+ bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
}
// Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
// Server's IP is N1.N2.N3.N4 (we ignore it)
@@ -496,7 +504,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
}
if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
- bb_error_msg_and_die("bad response to %s: %s", "RETR", buf);
+ bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
return sfp;
}
@@ -574,6 +582,7 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
struct host_info server, target;
len_and_sockaddr *lsa;
unsigned opt;
+ int redir_limit;
char *proxy = NULL;
char *dir_prefix = NULL;
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
@@ -696,104 +705,91 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
* We are not sure it exists on remove side */
}
- /* We want to do exactly _one_ DNS lookup, since some
- * sites (i.e. ftp.us.debian.org) use round-robin DNS
- * and we want to connect to only one IP... */
+ redir_limit = 5;
+ resolve_lsa:
lsa = xhost2sockaddr(server.host, server.port);
if (!(opt & WGET_OPT_QUIET)) {
- fprintf(stderr, "Connecting to %s (%s)\n", server.host,
- xmalloc_sockaddr2dotted(&lsa->u.sa));
- /* We leak result of xmalloc_sockaddr2dotted */
+ char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
+ fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
+ free(s);
}
-
- /* G.got_clen = 0; - already is */
- sfp = NULL;
+ establish_session:
if (use_proxy || !target.is_ftp) {
/*
* HTTP session
*/
+ char *str;
int status;
- int try = 5;
-
- do {
- char *str;
-
- G.got_clen = 0;
- G.chunked = 0;
-
- if (!--try)
- bb_error_msg_and_die("too many redirections");
-
- /* Open socket to http server */
- if (sfp) fclose(sfp);
- sfp = open_socket(lsa);
-
- /* Send HTTP request */
- if (use_proxy) {
- fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
- target.is_ftp ? "f" : "ht", target.host,
- target.path);
- } else {
- if (opt & WGET_OPT_POST_DATA)
- fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
- else
- fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
- }
- fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
- target.host, user_agent);
+ /* Open socket to http server */
+ sfp = open_socket(lsa);
+
+ /* Send HTTP request */
+ if (use_proxy) {
+ fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
+ target.is_ftp ? "f" : "ht", target.host,
+ target.path);
+ } else {
+ if (opt & WGET_OPT_POST_DATA)
+ fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
+ else
+ fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
+ }
+
+ fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
+ target.host, user_agent);
#if ENABLE_FEATURE_WGET_AUTHENTICATION
- if (target.user) {
- fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
- base64enc_512(buf, target.user));
- }
- if (use_proxy && server.user) {
- fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
- base64enc_512(buf, server.user));
- }
+ if (target.user) {
+ fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
+ base64enc_512(buf, target.user));
+ }
+ if (use_proxy && server.user) {
+ fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
+ base64enc_512(buf, server.user));
+ }
#endif
- if (beg_range)
- fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
+ if (beg_range)
+ fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
- if (extra_headers)
- fputs(extra_headers, sfp);
-
- if (opt & WGET_OPT_POST_DATA) {
- char *estr = URL_escape(post_data);
- fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
- fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
- (int) strlen(estr), estr);
- /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
- /*fprintf(sfp, "%s\r\n", estr);*/
- free(estr);
- } else
+ if (extra_headers)
+ fputs(extra_headers, sfp);
+
+ if (opt & WGET_OPT_POST_DATA) {
+ char *estr = URL_escape(post_data);
+ fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
+ fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
+ (int) strlen(estr), estr);
+ /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
+ /*fprintf(sfp, "%s\r\n", estr);*/
+ free(estr);
+ } else
#endif
- { /* If "Connection:" is needed, document why */
- fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
- }
+ { /* If "Connection:" is needed, document why */
+ fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
+ }
- /*
- * Retrieve HTTP response line and check for "200" status code.
- */
+ /*
+ * Retrieve HTTP response line and check for "200" status code.
+ */
read_response:
- if (fgets(buf, sizeof(buf), sfp) == NULL)
- bb_error_msg_and_die("no response from server");
-
- str = buf;
- str = skip_non_whitespace(str);
- str = skip_whitespace(str);
- // FIXME: no error check
- // xatou wouldn't work: "200 OK"
- status = atoi(str);
- switch (status) {
- case 0:
- case 100:
- while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
- /* eat all remaining headers */;
- goto read_response;
- case 200:
+ if (fgets(buf, sizeof(buf), sfp) == NULL)
+ bb_error_msg_and_die("no response from server");
+
+ str = buf;
+ str = skip_non_whitespace(str);
+ str = skip_whitespace(str);
+ // FIXME: no error check
+ // xatou wouldn't work: "200 OK"
+ status = atoi(str);
+ switch (status) {
+ case 0:
+ case 100:
+ while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
+ /* eat all remaining headers */;
+ goto read_response;
+ case 200:
/*
Response 204 doesn't say "null file", it says "metadata
has changed but data didn't":
@@ -818,60 +814,66 @@ is always terminated by the first empty line after the header fields."
However, in real world it was observed that some web servers
(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
*/
- case 204:
- break;
- case 300: /* redirection */
- case 301:
- case 302:
- case 303:
+ case 204:
+ break;
+ case 300: /* redirection */
+ case 301:
+ case 302:
+ case 303:
+ break;
+ case 206:
+ if (beg_range)
break;
- case 206:
- if (beg_range)
- break;
- /* fall through */
- default:
- /* Show first line only and kill any ESC tricks */
- buf[strcspn(buf, "\n\r\x1b")] = '\0';
- bb_error_msg_and_die("server returned error: %s", buf);
- }
+ /* fall through */
+ default:
+ bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
+ }
- /*
- * Retrieve HTTP headers.
- */
- while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
- /* gethdr converted "FOO:" string to lowercase */
- smalluint key = index_in_strings(keywords, buf) + 1;
- if (key == KEY_content_length) {
- content_len = BB_STRTOOFF(str, NULL, 10);
- if (errno || content_len < 0) {
- bb_error_msg_and_die("content-length %s is garbage", str);
- }
- G.got_clen = 1;
- continue;
- }
- if (key == KEY_transfer_encoding) {
- if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
- bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
- G.chunked = G.got_clen = 1;
+ /*
+ * Retrieve HTTP headers.
+ */
+ while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
+ /* gethdr converted "FOO:" string to lowercase */
+ smalluint key = index_in_strings(keywords, buf) + 1;
+ if (key == KEY_content_length) {
+ content_len = BB_STRTOOFF(str, NULL, 10);
+ if (errno || content_len < 0) {
+ bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
}
- if (key == KEY_location) {
- if (str[0] == '/')
- /* free(target.allocated); */
- target.path = /* target.allocated = */ xstrdup(str+1);
- else {
- parse_url(str, &target);
- if (use_proxy == 0) {
- server.host = target.host;
- server.port = target.port;
- }
+ G.got_clen = 1;
+ continue;
+ }
+ if (key == KEY_transfer_encoding) {
+ if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
+ bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
+ G.chunked = G.got_clen = 1;
+ }
+ if (key == KEY_location && status >= 300) {
+ if (--redir_limit == 0)
+ bb_error_msg_and_die("too many redirections");
+ fclose(sfp);
+ G.got_clen = 0;
+ G.chunked = 0;
+ if (str[0] == '/')
+ /* free(target.allocated); */
+ target.path = /* target.allocated = */ xstrdup(str+1);
+ /* lsa stays the same: it's on the same server */
+ else {
+ parse_url(str, &target);
+ if (!use_proxy) {
+ server.host = target.host;
+ server.port = target.port;
free(lsa);
- lsa = xhost2sockaddr(server.host, server.port);
- break;
- }
+ goto resolve_lsa;
+ } /* else: lsa stays the same: we use proxy */
}
+ goto establish_session;
}
- } while (status >= 300);
+ }
+// if (status >= 300)
+// bb_error_msg_and_die("bad redirection (no Location: header from server)");
+ /* For HTTP, data is pumped over the same connection */
dfp = sfp;
} else {
@@ -897,10 +899,11 @@ However, in real world it was observed that some web servers
retrieve_file_data(dfp, output_fd);
- if ((use_proxy == 0) && target.is_ftp) {
+ if (dfp != sfp) {
+ /* It's ftp. Close it properly */
fclose(dfp);
if (ftpcmd(NULL, NULL, sfp, buf) != 226)
- bb_error_msg_and_die("ftp error: %s", buf+4);
+ bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
ftpcmd("QUIT", NULL, sfp, buf);
}