wget: support multiple URLs on command line

function                                             old     new   delta
wget_main                                           2190    2310    +120
progress_meter                                       124     140     +16
parse_url                                            288     304     +16
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 152/0)             Total: 152 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2011-02-13 02:33:11 +01:00
parent a84eadf9bf
commit a3661096f2

View File

@ -15,8 +15,7 @@
struct host_info {
// May be used if we ever will want to free() all xstrdup()s...
/* char *allocated; */
char *allocated;
const char *path;
const char *user;
char *host;
@ -34,6 +33,14 @@ struct globals {
const char *curfile; /* Name of current file being transferred */
bb_progress_t pmt;
#endif
char *dir_prefix;
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
char *post_data;
char *extra_headers;
#endif
char *fname_out; /* where to direct output (-O) */
const char *proxy_flag; /* Use proxies if env vars are set */
const char *user_agent; /* "User-Agent" header field */
#if ENABLE_FEATURE_WGET_TIMEOUT
unsigned timeout_seconds;
#endif
@ -87,6 +94,7 @@ static void progress_meter(int flag)
G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
if (flag == PROGRESS_END) {
bb_progress_free(&G.pmt);
bb_putchar_stderr('\n');
G.transferred = 0;
}
@ -242,11 +250,12 @@ static int ftpcmd(const char *s1, const char *s2, FILE *fp)
return result;
}
static void parse_url(char *src_url, struct host_info *h)
static void parse_url(const char *src_url, struct host_info *h)
{
char *url, *p, *sp;
/* h->allocated = */ url = xstrdup(src_url);
free(h->allocated);
h->allocated = url = xstrdup(src_url);
if (strncmp(url, "http://", 7) == 0) {
h->port = bb_lookup_port("http", "tcp", 80);
@ -571,103 +580,36 @@ static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
G.got_clen = 1;
}
G.chunked = 0; /* make progress meter show 100% even for chunked */
/* Draw full bar and free its resources */
G.chunked = 0; /* makes it show 100% even for chunked download */
progress_meter(PROGRESS_END);
}
int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int wget_main(int argc UNUSED_PARAM, char **argv)
static int download_one_url(const char *url)
{
struct host_info server, target;
len_and_sockaddr *lsa;
unsigned opt;
bool use_proxy; /* Use proxies if env vars are set */
int redir_limit;
char *proxy = NULL;
char *dir_prefix = NULL;
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
char *post_data;
char *extra_headers = NULL;
llist_t *headers_llist = NULL;
#endif
int output_fd;
len_and_sockaddr *lsa;
FILE *sfp; /* socket to web/ftp server */
FILE *dfp; /* socket to ftp server (data) */
char *fname_out; /* where to direct output (-O) */
int output_fd = -1;
bool use_proxy; /* Use proxies if env vars are set */
const char *proxy_flag = "on"; /* Use proxies if env vars are set */
const char *user_agent = "Wget";/* "User-Agent" header field */
static const char keywords[] ALIGN1 =
"content-length\0""transfer-encoding\0""chunked\0""location\0";
enum {
KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
};
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
static const char wget_longopts[] ALIGN1 =
/* name, has_arg, val */
"continue\0" No_argument "c"
"spider\0" No_argument "s"
"quiet\0" No_argument "q"
"output-document\0" Required_argument "O"
"directory-prefix\0" Required_argument "P"
"proxy\0" Required_argument "Y"
"user-agent\0" Required_argument "U"
#if ENABLE_FEATURE_WGET_TIMEOUT
"timeout\0" Required_argument "T"
#endif
/* Ignored: */
// "tries\0" Required_argument "t"
/* Ignored (we always use PASV): */
"passive-ftp\0" No_argument "\xff"
"header\0" Required_argument "\xfe"
"post-data\0" Required_argument "\xfd"
/* Ignored (we don't do ssl) */
"no-check-certificate\0" No_argument "\xfc"
;
#endif
INIT_G();
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
applet_long_options = wget_longopts;
#endif
/* server.allocated = target.allocated = NULL; */
opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
&fname_out, &dir_prefix,
&proxy_flag, &user_agent,
IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
NULL /* -t RETRIES */
IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
);
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
if (headers_llist) {
int size = 1;
char *cp;
llist_t *ll = headers_llist;
while (ll) {
size += strlen(ll->data) + 2;
ll = ll->link;
}
extra_headers = cp = xmalloc(size);
while (headers_llist) {
cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
}
}
#endif
/* TODO: compat issue: should handle "wget URL1 URL2..." */
char *proxy = NULL;
char *fname_out_alloc;
struct host_info server;
struct host_info target;
server.allocated = NULL;
target.allocated = NULL;
server.user = NULL;
target.user = NULL;
parse_url(argv[optind], &target);
parse_url(url, &target);
/* Use the proxy if necessary */
use_proxy = (strcmp(proxy_flag, "off") != 0);
use_proxy = (strcmp(G.proxy_flag, "off") != 0);
if (use_proxy) {
proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
if (proxy && proxy[0]) {
server.user = NULL;
parse_url(proxy, &server);
} else {
use_proxy = 0;
@ -676,7 +618,8 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
if (!use_proxy) {
server.port = target.port;
if (ENABLE_FEATURE_IPV6) {
server.host = xstrdup(target.host);
//free(server.allocated); - can't be non-NULL
server.host = server.allocated = xstrdup(target.host);
} else {
server.host = target.host;
}
@ -685,34 +628,31 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
if (ENABLE_FEATURE_IPV6)
strip_ipv6_scope_id(target.host);
/* Guess an output filename, if there was no -O FILE */
if (!(opt & WGET_OPT_OUTNAME)) {
fname_out = bb_get_last_path_component_nostrip(target.path);
/* If there was no -O FILE, guess output filename */
output_fd = -1;
fname_out_alloc = NULL;
if (!G.fname_out) {
G.fname_out = bb_get_last_path_component_nostrip(target.path);
/* handle "wget http://kernel.org//" */
if (fname_out[0] == '/' || !fname_out[0])
fname_out = (char*)"index.html";
if (G.fname_out[0] == '/' || !G.fname_out[0])
G.fname_out = (char*)"index.html";
/* -P DIR is considered only if there was no -O FILE */
if (dir_prefix)
fname_out = concat_path_file(dir_prefix, fname_out);
if (G.dir_prefix)
G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
} else {
if (LONE_DASH(fname_out)) {
if (LONE_DASH(G.fname_out)) {
/* -O - */
output_fd = 1;
opt &= ~WGET_OPT_CONTINUE;
option_mask32 &= ~WGET_OPT_CONTINUE;
}
}
#if ENABLE_FEATURE_WGET_STATUSBAR
G.curfile = bb_get_last_path_component_nostrip(fname_out);
G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
#endif
/* Impossible?
if ((opt & WGET_OPT_CONTINUE) && !fname_out)
bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
*/
/* Determine where to start transfer */
if (opt & WGET_OPT_CONTINUE) {
output_fd = open(fname_out, O_WRONLY);
if (option_mask32 & WGET_OPT_CONTINUE) {
output_fd = open(G.fname_out, O_WRONLY);
if (output_fd >= 0) {
G.beg_range = xlseek(output_fd, 0, SEEK_END);
}
@ -723,12 +663,13 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
redir_limit = 5;
resolve_lsa:
lsa = xhost2sockaddr(server.host, server.port);
if (!(opt & WGET_OPT_QUIET)) {
if (!(option_mask32 & WGET_OPT_QUIET)) {
char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
free(s);
}
establish_session:
G.chunked = G.got_clen = 0;
if (use_proxy || !target.is_ftp) {
/*
* HTTP session
@ -736,6 +677,7 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
char *str;
int status;
/* Open socket to http server */
sfp = open_socket(lsa);
@ -745,14 +687,14 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
target.is_ftp ? "f" : "ht", target.host,
target.path);
} else {
if (opt & WGET_OPT_POST_DATA)
if (option_mask32 & WGET_OPT_POST_DATA)
fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
else
fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
}
fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
target.host, user_agent);
target.host, G.user_agent);
/* Ask server to close the connection as soon as we are done
* (IOW: we do not intend to send more requests)
@ -774,11 +716,11 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
if (extra_headers)
fputs(extra_headers, sfp);
if (G.extra_headers)
fputs(G.extra_headers, sfp);
if (opt & WGET_OPT_POST_DATA) {
char *estr = URL_escape(post_data);
if (option_mask32 & WGET_OPT_POST_DATA) {
char *estr = URL_escape(G.post_data);
fprintf(sfp,
"Content-Type: application/x-www-form-urlencoded\r\n"
"Content-Length: %u\r\n"
@ -810,7 +752,7 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
switch (status) {
case 0:
case 100:
while (gethdr(sfp /*, &n*/) != NULL)
while (gethdr(sfp) != NULL)
/* eat all remaining headers */;
goto read_response;
case 200:
@ -856,9 +798,16 @@ However, in real world it was observed that some web servers
/*
* Retrieve HTTP headers.
*/
while ((str = gethdr(sfp /*, &n*/)) != NULL) {
/* gethdr converted "FOO:" string to lowercase */
while ((str = gethdr(sfp)) != NULL) {
static const char keywords[] ALIGN1 =
"content-length\0""transfer-encoding\0""location\0";
enum {
KEY_content_length = 1, KEY_transfer_encoding, KEY_location
};
smalluint key;
/* gethdr converted "FOO:" string to lowercase */
/* strip trailing whitespace */
char *s = strchrnul(str, '\0') - 1;
while (s >= str && (*s == ' ' || *s == '\t')) {
@ -875,23 +824,22 @@ However, in real world it was observed that some web servers
continue;
}
if (key == KEY_transfer_encoding) {
if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
if (strcmp(str_tolower(str), "chunked") != 0)
bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
G.chunked = G.got_clen = 1;
G.chunked = 1;
}
if (key == KEY_location && status >= 300) {
if (--redir_limit == 0)
bb_error_msg_and_die("too many redirections");
fclose(sfp);
G.got_clen = 0;
G.chunked = 0;
if (str[0] == '/')
/* free(target.allocated); */
target.path = /* target.allocated = */ xstrdup(str+1);
if (str[0] == '/') {
free(target.allocated);
target.path = target.allocated = xstrdup(str+1);
/* lsa stays the same: it's on the same server */
else {
} else {
parse_url(str, &target);
if (!use_proxy) {
free(server.allocated);
server.host = target.host;
/* strip_ipv6_scope_id(target.host); - no! */
/* we assume remote never gives us IPv6 addr with scope id */
@ -916,30 +864,113 @@ However, in real world it was observed that some web servers
sfp = prepare_ftp_session(&dfp, &target, lsa);
}
if (opt & WGET_OPT_SPIDER) {
if (ENABLE_FEATURE_CLEAN_UP)
fclose(sfp);
free(lsa);
free(server.allocated);
free(target.allocated);
if (option_mask32 & WGET_OPT_SPIDER) {
free(fname_out_alloc);
fclose(sfp);
return EXIT_SUCCESS;
}
if (output_fd < 0) {
int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
/* compat with wget: -O FILE can overwrite */
if (opt & WGET_OPT_OUTNAME)
if (option_mask32 & WGET_OPT_OUTNAME)
o_flags = O_WRONLY | O_CREAT | O_TRUNC;
output_fd = xopen(fname_out, o_flags);
output_fd = xopen(G.fname_out, o_flags);
}
free(fname_out_alloc);
retrieve_file_data(dfp, output_fd);
xclose(output_fd);
if (dfp != sfp) {
/* It's ftp. Close it properly */
/* It's ftp. Close data connection properly */
fclose(dfp);
if (ftpcmd(NULL, NULL, sfp) != 226)
bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
/* ftpcmd("QUIT", NULL, sfp); - why bother? */
}
fclose(sfp);
return EXIT_SUCCESS;
}
int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int wget_main(int argc UNUSED_PARAM, char **argv)
{
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
static const char wget_longopts[] ALIGN1 =
/* name, has_arg, val */
"continue\0" No_argument "c"
//FIXME: -s isn't --spider, it's --save-headers!
"spider\0" No_argument "s"
"quiet\0" No_argument "q"
"output-document\0" Required_argument "O"
"directory-prefix\0" Required_argument "P"
"proxy\0" Required_argument "Y"
"user-agent\0" Required_argument "U"
#if ENABLE_FEATURE_WGET_TIMEOUT
"timeout\0" Required_argument "T"
#endif
/* Ignored: */
// "tries\0" Required_argument "t"
/* Ignored (we always use PASV): */
"passive-ftp\0" No_argument "\xff"
"header\0" Required_argument "\xfe"
"post-data\0" Required_argument "\xfd"
/* Ignored (we don't do ssl) */
"no-check-certificate\0" No_argument "\xfc"
;
#endif
int exitcode;
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
llist_t *headers_llist = NULL;
#endif
INIT_G();
IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
G.proxy_flag = "on"; /* use proxies if env vars are set */
G.user_agent = "Wget"; /* "User-Agent" header field */
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
applet_long_options = wget_longopts;
#endif
opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
&G.fname_out, &G.dir_prefix,
&G.proxy_flag, &G.user_agent,
IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
NULL /* -t RETRIES */
IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
);
argv += optind;
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
if (headers_llist) {
int size = 1;
char *cp;
llist_t *ll = headers_llist;
while (ll) {
size += strlen(ll->data) + 2;
ll = ll->link;
}
G.extra_headers = cp = xmalloc(size);
while (headers_llist) {
cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
}
}
#endif
exitcode = 0;
while (*argv)
exitcode |= download_one_url(*argv++);
return exitcode;
}