From 6f5f57c4224d1ded876dd0fc981abb3f09f14df3 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Tue, 2 Oct 2018 21:48:06 -0500 Subject: [PATCH] Support spaces in URLs. This applies to both directly entered URLs and redirects. At least some servers (e.g. bit.ly) will send un-percent-encoded URLs that may contain spaces in the Location header. Spaces are now percent-encoded in the HTTP request. Other characters that should be percent-encoded still aren't, but I think many servers can accept them as-is. --- http.c | 38 ++++++++++++++++++++++++++++++++++++-- seturl.c | 2 +- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/http.c b/http.c index 7754bb5..5db481f 100644 --- a/http.c +++ b/http.c @@ -47,10 +47,35 @@ UpdateRequestRange(Session *sess, unsigned long start, unsigned long end) { Boolean BuildHTTPRequest(Session *sess, char *resourceStr) { - int sizeNeeded = 0; + long sizeNeeded; int rangeOffset; int round = 0; + char *escapedStr = NULL; + if (strchr(resourceStr, ' ') != NULL) { + sizeNeeded = strlen(resourceStr); + if (sizeNeeded > 10000) + return FALSE; + escapedStr = malloc(sizeNeeded*3 + 1); + if (escapedStr == NULL) + return FALSE; + + char *s = escapedStr; + char c; + while ((c = *resourceStr++) != '\0') { + if (c == ' ') { + *s++ = '%'; + *s++ = '2'; + *s++ = '0'; + } else { + *s++ = c; + } + *s = '\0'; + } + resourceStr = escapedStr; + } + + sizeNeeded = 0; do { sizeNeeded = snprintf(sess->httpRequest, sizeNeeded, "GET /%s HTTP/1.1\r\n" @@ -69,6 +94,7 @@ Boolean BuildHTTPRequest(Session *sess, char *resourceStr) { free(sess->httpRequest); sess->httpRequest = NULL; sess->httpRequestRange = NULL; + free(escapedStr); return FALSE; } @@ -78,11 +104,14 @@ Boolean BuildHTTPRequest(Session *sess, char *resourceStr) { sess->httpRequest = malloc(sizeNeeded); if (sess->httpRequest == NULL) { sess->httpRequestRange = NULL; + free(escapedStr); return FALSE; } } } while (round++ == 0); + free(escapedStr); + sess->httpRequestRange = sess->httpRequest + rangeOffset; UpdateRequestRange(sess, sess->desiredStart, sess->desiredEnd); @@ -317,10 +346,14 @@ netRetry: case LOCATION: endPtr = response; char c; - while ((c = *endPtr)!=0 && c!='\r' && c!='\n' && c!=' ' && c!='\t') + while ((c = *endPtr) != '\0' && c != '\r' && c != '\n') endPtr++; if (c == '\0' || c == '\n') goto errorReturn; + while (endPtr > response + && (*(endPtr-1) == ' ' || *(endPtr-1) == '\t')) { + c = *--endPtr; + } if (wantRedirect) { *endPtr = '\0'; if (SetURL(sess, response, FALSE, TRUE) != OPERATION_SUCCESSFUL) { @@ -331,6 +364,7 @@ netRetry: response = endPtr; gotRedirect = TRUE; } + break; /* Unknown headers: ignored */ case UNKNOWN_HEADER: diff --git a/seturl.c b/seturl.c index cdd1352..ef523c5 100644 --- a/seturl.c +++ b/seturl.c @@ -25,7 +25,7 @@ SetURL(Session *sess, char *url, Boolean permissive, Boolean partialOK) { } for(unsigned int i = 0; url[i] != '\0'; i++) { - if ((unsigned char)url[i] <= ' ') { + if ((unsigned char)url[i] < ' ') { return INVALID_CHARACTER_IN_URL; } }