From c237bd661ff3dfe21c5a1b85059b267bd53a2dd3 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Fri, 12 Apr 2019 23:23:39 -0500 Subject: [PATCH] Add utility code for HTTP connections, URLs, and networking. This is adapted from NetDisk with minor changes. --- Makefile | 2 +- hostname.c | 40 ++++++ hostname.h | 9 ++ http.c | 358 ++++++++++++++++++++++++++++++++++++++++++++++++ http.h | 11 ++ readtcp.c | 48 +++++++ readtcp.h | 16 +++ session.h | 47 +++++++ seturl.c | 99 +++++++++++++ seturl.h | 9 ++ strcasecmp.c | 31 +++++ strcasecmp.h | 9 ++ tcpconnection.c | 60 ++++++++ tcpconnection.h | 10 ++ urlparser.c | 125 +++++++++++++++++ urlparser.h | 25 ++++ version.h | 1 + 17 files changed, 899 insertions(+), 1 deletion(-) create mode 100644 hostname.c create mode 100644 hostname.h create mode 100644 http.c create mode 100644 http.h create mode 100644 readtcp.c create mode 100644 readtcp.h create mode 100644 session.h create mode 100644 seturl.c create mode 100644 seturl.h create mode 100644 strcasecmp.c create mode 100644 strcasecmp.h create mode 100644 tcpconnection.c create mode 100644 tcpconnection.h create mode 100644 urlparser.c create mode 100644 urlparser.h create mode 100644 version.h diff --git a/Makefile b/Makefile index f9cc335..46a5675 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ CFLAGS = -w-1 -O-1 TEST_OBJS = test.a json.a jsonutil.a TEST_PROG = test -DISKBROWSER_OBJS = diskbrowser.a +DISKBROWSER_OBJS = diskbrowser.a hostname.a http.a json.a jsonutil.a readtcp.a seturl.a strcasecmp.a tcpconnection.a urlparser.a DISKBROWSER_RSRC = diskbrowser.rez DISKBROWSER_PROG = DiskBrowser diff --git a/hostname.c b/hostname.c new file mode 100644 index 0000000..6ebfd88 --- /dev/null +++ b/hostname.c @@ -0,0 +1,40 @@ +#pragma noroot + +#include +#include +#include +#include "hostname.h" + +#define DNR_WAIT_TIME 15 /*seconds*/ + +Boolean DoLookupName(Session *sess) { + dnrBuffer dnrInfo; + + if (TCPIPValidateIPString(sess->hostName)) { + cvtRec cvtInfo; + TCPIPConvertIPToHex(&cvtInfo, sess->hostName); + sess->ipAddr = cvtInfo.cvtIPAddress; + return TRUE; + } + + TCPIPDNRNameToIP(sess->hostName, &dnrInfo); + if (toolerror()) + return FALSE; + + sess->dnsTime = GetTick(); + while (dnrInfo.DNRstatus == DNR_Pending) { + if (GetTick() - sess->dnsTime >= DNR_WAIT_TIME * 60) + break; + TCPIPPoll(); + } + + if (dnrInfo.DNRstatus == DNR_OK) { + sess->ipAddr = dnrInfo.DNRIPaddress; + return TRUE; + } else { + if (dnrInfo.DNRstatus == DNR_Pending) { + TCPIPCancelDNR(&dnrInfo); + } + return FALSE; + } +} diff --git a/hostname.h b/hostname.h new file mode 100644 index 0000000..f91bdff --- /dev/null +++ b/hostname.h @@ -0,0 +1,9 @@ +#ifndef HOSTNAME_H +#define HOSTNAME_H + +#include +#include "session.h" + +Boolean DoLookupName(Session *sess); + +#endif diff --git a/http.c b/http.c new file mode 100644 index 0000000..33c108a --- /dev/null +++ b/http.c @@ -0,0 +1,358 @@ +#pragma lint -1 +#pragma noroot + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "session.h" +#include "http.h" +#include "tcpconnection.h" +#include "strcasecmp.h" +#include "seturl.h" +#include "version.h" + +#define buffTypePointer 0x0000 /* For TCPIPReadTCP() */ +#define buffTypeHandle 0x0001 +#define buffTypeNewHandle 0x0002 + +#define HTTP_RESPONSE_TIMEOUT 15 /* seconds to wait for response headers */ + +#define MAX_REDIRECTS 5 + +enum ResponseHeader { + UNKNOWN_HEADER = 0, + CONTENT_RANGE, + CONTENT_LENGTH, + TRANSFER_ENCODING, + CONTENT_ENCODING, + LOCATION, +}; + +Boolean BuildHTTPRequest(Session *sess, char *resourceStr) { + long sizeNeeded; + int round = 0; + + char *escapedStr = NULL; + if (strchr(resourceStr, ' ') != NULL) { + sizeNeeded = strlen(resourceStr); + if (sizeNeeded > 10000) + return FALSE; + escapedStr = malloc(sizeNeeded*3 + 1); + if (escapedStr == NULL) + return FALSE; + + char *s = escapedStr; + char c; + while ((c = *resourceStr++) != '\0') { + if (c == ' ') { + *s++ = '%'; + *s++ = '2'; + *s++ = '0'; + } else { + *s++ = c; + } + *s = '\0'; + } + resourceStr = escapedStr; + } + + sizeNeeded = 0; + do { + sizeNeeded = snprintf(sess->httpRequest, sizeNeeded, + "GET /%s HTTP/1.1\r\n" + "Host: %s\r\n" + "User-Agent: GS-Disk-Browser/" USER_AGENT_VERSION "\r\n" + "Accept-Encoding: identity\r\n" + //"Accept: */*\r\n" /* default, but some clients send explicitly */ + //"Connection: Keep-Alive\r\n" /* same */ + "\r\n", + resourceStr, + sess->hostName+1); + + if (sizeNeeded <= 0) { + free(sess->httpRequest); + sess->httpRequest = NULL; + free(escapedStr); + return FALSE; + } + + if (round == 0) { + sizeNeeded++; /* account for terminating NUL */ + free(sess->httpRequest); + sess->httpRequest = malloc(sizeNeeded); + if (sess->httpRequest == NULL) { + free(escapedStr); + return FALSE; + } + } + } while (round++ == 0); + + free(escapedStr); + + return TRUE; +} + +enum NetDiskError +DoHTTPRequest(Session *sess, unsigned long start, unsigned long end) { +top:; + union { + srBuff srBuff; + rlrBuff rlrBuff; + } u; + Word tcpError; + Boolean wantRedirect = FALSE, gotRedirect = FALSE; + enum NetDiskError result; + + sess->responseCode = 0; + + /* Send out request */ + result = NETWORK_ERROR; + unsigned int netErrors = 0; + + TCPIPPoll(); + if (sess->tcpLoggedIn) { + tcpError = TCPIPStatusTCP(sess->ipid, &u.srBuff); + if (tcpError || toolerror() || u.srBuff.srState != TCPSESTABLISHED) { + EndTCPConnection(sess); + } + } + + u.rlrBuff.rlrBuffHandle = NULL; + +netRetry: + if (!sess->tcpLoggedIn || netErrors) { + if (StartTCPConnection(sess) != 0) + goto errorReturn; + } + tcpError = TCPIPWriteTCP(sess->ipid, sess->httpRequest, + sess->httpRequestLen, TRUE, FALSE); + if (tcpError || toolerror()) { + if (netErrors == 0) { + netErrors++; + goto netRetry; + } else { + goto errorReturn; + } + } + + /* Get response status line & headers */ + LongWord startTime = GetTick(); + do { + TCPIPPoll(); + tcpError = TCPIPReadLineTCP(sess->ipid, + (void*)((LongWord)"\p\r\n\r\n" | 0x80000000), + buffTypeNewHandle, (Ref)NULL, + 0xFFFFFF, &u.rlrBuff); + if (tcpError || toolerror()) { + if (netErrors == 0) { + netErrors++; + goto netRetry; + } else { + goto errorReturn; + } + } + } while (u.rlrBuff.rlrBuffCount == 0 + && GetTick() - startTime < HTTP_RESPONSE_TIMEOUT * 60); + + result = NO_RESPONSE; + if (!u.rlrBuff.rlrIsDataFlag) + goto errorReturn; + + result = INVALID_RESPONSE; + /* Response must be at least long enough for a status line & final CRLF */ + if (u.rlrBuff.rlrBuffCount < 8+1+3+1+2+2) + goto errorReturn; + + HLock(u.rlrBuff.rlrBuffHandle); + + char *response = *u.rlrBuff.rlrBuffHandle; + char *responseEnd = response + u.rlrBuff.rlrBuffCount; + /* Make response a C-string. Specifically, it will end "CR LF NUL NUL". */ + response[u.rlrBuff.rlrBuffCount - 2] = '\0'; + response[u.rlrBuff.rlrBuffCount - 1] = '\0'; + + /* Parse status line of HTTP response */ + char *endPtr; + + if (strncmp(response, "HTTP/1.", 7) != 0) + goto errorReturn; + response += 7; + if (!(*response >= '1' && *response <= '9')) + goto errorReturn; + response += 1; + if (*response != ' ') + goto errorReturn; + response += 1; + + errno = 0; + sess->responseCode = strtoul(response, &endPtr, 10); + if (errno || sess->responseCode > 999 || endPtr != response + 3) + goto errorReturn; + response += 3; + + if (*response != ' ') + goto errorReturn; + response++; + + while (*response != '\r') + response++; + response++; + + if (*response != '\n') + goto errorReturn; + response++; + + + switch((unsigned int)sess->responseCode) { + /* Redirect responses */ + case 300: case 301: case 302: case 307: case 308: + if (sess->redirectCount < MAX_REDIRECTS) { + sess->redirectCount++; + wantRedirect = TRUE; + break; + } else { + result = EXCESSIVE_REDIRECTS; + goto errorReturn; + } + + /* Full content, as desired */ + case 200: + break; + + default: + if (sess->responseCode < 400 || sess->responseCode > 599) { + result = UNSUPPORTED_RESPONSE; + } else { + result = sess->responseCode; + } + goto errorReturn; + } + + /* Parse response headers */ + sess->contentLength = 0; + + result = UNSUPPORTED_HEADER_VALUE; + + while (response < responseEnd - 4) { + enum ResponseHeader header = UNKNOWN_HEADER; + + if (wantRedirect) { + if (strncasecmp(response, "Location:", 9) == 0) { + response += 9; + header = LOCATION; + } + } else switch (_toupper(*response)) { + case 'C': + if (strncasecmp(response, "Content-Length:", 15) == 0) { + response += 15; + header = CONTENT_LENGTH; + } else if (strncasecmp(response, "Content-Encoding:", 17) == 0) { + response += 17; + header = CONTENT_ENCODING; + } + break; + + case 'T': + if (strncasecmp(response, "Transfer-Encoding:", 18) == 0) { + response += 18; + header = TRANSFER_ENCODING; + } + break; + } + + while (*response == ' ' || *response == '\t') + response++; + + switch (header) { + case CONTENT_LENGTH: + errno = 0; + sess->contentLength = strtoul(response, &endPtr, 10); + if (errno) + goto errorReturn; + if (sess->contentLength == 0 && !wantRedirect) + goto errorReturn; + response = endPtr; + break; + + case CONTENT_ENCODING: + case TRANSFER_ENCODING: + if (strcasecmp(response, "identity") == 0) { + response += 8; + } else { + goto errorReturn; + } + break; + + case LOCATION: + endPtr = response; + char c; + while ((c = *endPtr) != '\0' && c != '\r' && c != '\n') + endPtr++; + if (c == '\0' || c == '\n') + goto errorReturn; + while (endPtr > response + && (*(endPtr-1) == ' ' || *(endPtr-1) == '\t')) { + c = *--endPtr; + } + if (wantRedirect) { + *endPtr = '\0'; + if (SetURL(sess, response, FALSE, TRUE) != OPERATION_SUCCESSFUL) { + result = REDIRECT_ERROR; + goto errorReturn; + } + *endPtr = c; + response = endPtr; + gotRedirect = TRUE; + } + break; + + /* Unknown headers: ignored */ + case UNKNOWN_HEADER: + default: + while (*response != '\r') + response++; + break; + } + + while ((*response == ' ' || *response == '\t') && response < responseEnd) + response++; + + if (*response != '\r') + goto errorReturn; + response++; + if (*response != '\n') + goto errorReturn; + response++; + } + + /* Wanted redirect: Retry with new location if we got it. */ + if (wantRedirect) { + if (gotRedirect) { + DisposeHandle(u.rlrBuff.rlrBuffHandle); + goto top; + } else { + result = UNSUPPORTED_RESPONSE; + goto errorReturn; + } + } + + result = OPERATION_SUCCESSFUL; + DisposeHandle(u.rlrBuff.rlrBuffHandle); + return result; + +errorReturn: + if (u.rlrBuff.rlrBuffHandle != NULL) { + DisposeHandle(u.rlrBuff.rlrBuffHandle); + } + + /* Error condition on this TCP connection means it can't be reused. */ + EndTCPConnection(sess); + return result; +} diff --git a/http.h b/http.h new file mode 100644 index 0000000..f18d1d2 --- /dev/null +++ b/http.h @@ -0,0 +1,11 @@ +#ifndef HTTP_H +#define HTTP_H + +#include +#include "session.h" +#include "netdiskerror.h" + +Boolean BuildHTTPRequest(Session *sess, char *resourceStr); +enum NetDiskError DoHTTPRequest(Session *sess, unsigned long start, unsigned long end); + +#endif \ No newline at end of file diff --git a/readtcp.c b/readtcp.c new file mode 100644 index 0000000..78918ff --- /dev/null +++ b/readtcp.c @@ -0,0 +1,48 @@ +#pragma noroot + +#include "readtcp.h" +#include "session.h" +#include +#include +#include + +#define buffTypePointer 0x0000 /* For TCPIPReadTCP() */ +#define buffTypeHandle 0x0001 +#define buffTypeNewHandle 0x0002 + +/* Time out if no new data is received for this long */ +#define READ_TIMEOUT 15 /* seconds */ + +void InitReadTCP(Session *sess, LongWord readCount, void *readPtr) { + sess->readCount = readCount; + sess->readPtr = readPtr; + sess->lastReadTime = GetTick(); +} + + +ReadStatus TryReadTCP(Session *sess) { + rrBuff rrBuff; + + TCPIPPoll(); + sess->tcperr = TCPIPReadTCP(sess->ipid, buffTypePointer, (Ref)sess->readPtr, + sess->readCount, &rrBuff); + sess->toolerr = toolerror(); + if (sess->tcperr || sess->toolerr) { + return rsError; + } + + sess->readCount -= rrBuff.rrBuffCount; + sess->readPtr += rrBuff.rrBuffCount; + + if (sess->readCount == 0) { + return rsDone; + } else { + if (rrBuff.rrBuffCount != 0) { + sess->lastReadTime = GetTick(); + } else if (GetTick() - sess->lastReadTime > READ_TIMEOUT * 60) { + return rsTimedOut; + } + + return rsWaiting; + } +} diff --git a/readtcp.h b/readtcp.h new file mode 100644 index 0000000..9f1a79f --- /dev/null +++ b/readtcp.h @@ -0,0 +1,16 @@ +#ifndef READTCP_H +#define READTCP_H + +#include "session.h" + +typedef enum ReadStatus { + rsDone, + rsWaiting, + rsError, + rsTimedOut +} ReadStatus; + +void InitReadTCP(Session *sess, LongWord readCount, void *readPtr); +ReadStatus TryReadTCP(Session *sess); + +#endif diff --git a/session.h b/session.h new file mode 100644 index 0000000..c01c5e9 --- /dev/null +++ b/session.h @@ -0,0 +1,47 @@ +#ifndef SESSION_H +#define SESSION_H + +#include + +typedef struct Session { + /* Marinetti TCP connection status */ + Word ipid; + Boolean tcpLoggedIn; + + /* ReadTCP status */ + LongWord readCount; + Byte *readPtr; + LongWord lastReadTime; + + /* Marinetti error codes, both the tcperr* value and any tool error */ + Word tcperr; + Word toolerr; + + /* HTTP request to send (if non-NULL, points to malloc'd buffer) */ + char *httpRequest; + /* Length of HTTP request */ + LongWord httpRequestLen; + + /* HTTP response code */ + LongWord responseCode; + + /* IP address and TCP port of host */ + LongWord ipAddr; + Word port; + + /* Domain name or IP address of host (p-string, but also null-terminated) */ + char hostName[257]; + /* Time (GetTick) of last DNS lookup */ + LongWord dnsTime; + + /* Number of redirects followed */ + Word redirectCount; + + /* Value reported by server in Content-Length header */ + LongWord contentLength; +} Session; + + +void EndNetDiskSession(Session *sess); + +#endif diff --git a/seturl.c b/seturl.c new file mode 100644 index 0000000..ef523c5 --- /dev/null +++ b/seturl.c @@ -0,0 +1,99 @@ +#pragma noroot + +#include +#include +#include +#include +#include "session.h" +#include "urlparser.h" +#include "strcasecmp.h" +#include "hostname.h" +#include "http.h" +#include "tcpconnection.h" +#include "seturl.h" + +#define DEFAULT_HTTP_PORT 80 + +/* Limit to make sure sizes stay within the range of 16-bit values */ +#define MAX_URL_LENGTH 30000 + + +enum NetDiskError +SetURL(Session *sess, char *url, Boolean permissive, Boolean partialOK) { + if (strlen(url) > MAX_URL_LENGTH) { + return URL_TOO_LONG; + } + + for(unsigned int i = 0; url[i] != '\0'; i++) { + if ((unsigned char)url[i] < ' ') { + return INVALID_CHARACTER_IN_URL; + } + } + + URLParts urlParts = ParseURL(url); + + if (urlParts.errorFound) + return BAD_URL_SYNTAX; + + if (urlParts.scheme == NULL) { + if (permissive) { + urlParts.scheme = "http"; + } else { + return BAD_URL_SYNTAX; + } + } else if (strcasecmp(urlParts.scheme, "http") != 0) { + return UNSUPPORTED_URL_SCHEME; + } + + if (urlParts.username != NULL || urlParts.password != NULL) { + return AUTHENTICATION_NOT_SUPPORTED; + } + + if (urlParts.fragment != NULL) { + return FRAGMENT_NOT_SUPPORTED; + } + + unsigned long portNum; + char *endPtr; + if (urlParts.port == NULL || *urlParts.port == '\0') { + portNum = DEFAULT_HTTP_PORT; + } else { + errno = 0; + portNum = strtoul(urlParts.port, &endPtr, 10); + if (errno || *endPtr != '\0' || portNum > 0xFFFF) { + return INVALID_PORT_NUMBER; + } + } + sess->port = portNum; + + if (urlParts.host == NULL) { + if (!partialOK || sess->hostName[0] == 0) { + return NO_HOST_SPECIFIED; + } + } else if (*urlParts.host == '\0') { + return NO_HOST_SPECIFIED; + } else if (*urlParts.host == '[') { + return IPV6_NOT_SUPPORTED; + } else { + size_t len; + if ((len = strlen(urlParts.host)) > 255) { + return HOSTNAME_TOO_LONG; + } + + strcpy(&sess->hostName[1], urlParts.host); + sess->hostName[0] = len; + + if (!DoLookupName(sess)) { + return NAME_LOOKUP_FAILED; + } + } + + if (!BuildHTTPRequest(sess, urlParts.path)) { + return OUT_OF_MEMORY; + } + + /* End any existing TCP connection to old URL */ + EndTCPConnection(sess); + + return OPERATION_SUCCESSFUL; +} diff --git a/seturl.h b/seturl.h new file mode 100644 index 0000000..e7f5f11 --- /dev/null +++ b/seturl.h @@ -0,0 +1,9 @@ +#ifndef SETURL_H +#define SETURL_H + +#include "netdiskerror.h" + +enum NetDiskError +SetURL(Session *sess, char *url, Boolean permissive, Boolean partialOK); + +#endif diff --git a/strcasecmp.c b/strcasecmp.c new file mode 100644 index 0000000..821bbfd --- /dev/null +++ b/strcasecmp.c @@ -0,0 +1,31 @@ +# ifdef __ORCAC__ +# pragma noroot +# endif + +#include +#include + +int strcasecmp(const char *s1, const char *s2) +{ + while (*s1 != '\0' && tolower(*s1) == tolower(*s2)) { + s1++; + s2++; + } + + return (int)*s1 - (int)*s2; +} + + +int strncasecmp(const char *s1, const char *s2, size_t n) +{ + if (n == 0) + return 0; + + while (n > 1 && *s1 != '\0' && tolower(*s1) == tolower(*s2)) { + s1++; + s2++; + n--; + } + + return (int)*s1 - (int)*s2; +} diff --git a/strcasecmp.h b/strcasecmp.h new file mode 100644 index 0000000..81eee39 --- /dev/null +++ b/strcasecmp.h @@ -0,0 +1,9 @@ +#ifndef STRCASECMP_H +#define STRCASECMP_H + +#include + +int strcasecmp(const char *s1, const char *s2); +int strncasecmp(const char *s1, const char *s2, size_t n); + +#endif diff --git a/tcpconnection.c b/tcpconnection.c new file mode 100644 index 0000000..33f7b85 --- /dev/null +++ b/tcpconnection.c @@ -0,0 +1,60 @@ +#pragma noroot + +#include +#include +#include +#include +#include "session.h" +#include "tcpconnection.h" + +/* Make a TCP connection for a session. + * + * On success, returns 0 and sets sess->ipid. + * On failure, returns an error code. + */ +enum NetDiskError StartTCPConnection(Session *sess) { + Word tcperr; + srBuff mySRBuff; + LongWord initialTime; + + /* End any existing TCP connection */ + EndTCPConnection(sess); + + sess->ipid = + TCPIPLogin(userid(), sess->ipAddr, sess->port, 0, 0x40); + if (toolerror()) + return NETWORK_ERROR; + + tcperr = TCPIPOpenTCP(sess->ipid); + if (toolerror()) { + TCPIPLogout(sess->ipid); + return NETWORK_ERROR; + } else if (tcperr != tcperrOK) { + TCPIPLogout(sess->ipid); + return NO_RESPONSE; + } + + initialTime = GetTick(); + do { + TCPIPPoll(); + TCPIPStatusTCP(sess->ipid, &mySRBuff); + } while (mySRBuff.srState == TCPSSYNSENT && GetTick()-initialTime < 15*60); + if (mySRBuff.srState != TCPSESTABLISHED) { + TCPIPAbortTCP(sess->ipid); + TCPIPLogout(sess->ipid); + return NO_RESPONSE; + } + + sess->tcpLoggedIn = TRUE; + return 0; +} + +void EndTCPConnection(Session *sess) { + if (sess->tcpLoggedIn) { + TCPIPPoll(); + TCPIPAbortTCP(sess->ipid); + TCPIPLogout(sess->ipid); + sess->ipid = 0; + sess->tcpLoggedIn = FALSE; + } +} diff --git a/tcpconnection.h b/tcpconnection.h new file mode 100644 index 0000000..877f6bd --- /dev/null +++ b/tcpconnection.h @@ -0,0 +1,10 @@ +#ifndef TCPCONNECTION_H +#define TCPCONNECTION_H + +#include "session.h" +#include "netdiskerror.h" + +enum NetDiskError StartTCPConnection(Session *sess); +void EndTCPConnection(Session *sess); + +#endif diff --git a/urlparser.c b/urlparser.c new file mode 100644 index 0000000..75ca10c --- /dev/null +++ b/urlparser.c @@ -0,0 +1,125 @@ +#ifdef __ORCAC__ +# pragma noroot +#endif + +#include "urlparser.h" +#include + +/* + * Parse the URL and break it up into its component parts. + * + * This handles http URLs and other schemes with the same syntax. + * + * It can also handle relative URLs containing an absolute-path reference + * (i.e. starting with /) and URL-like addresses without the scheme prefix. + * In these cases, the portions of the URL not provided will be NULL. + * + * This modifies the original string passed in, breaking it up into components. + */ +URLParts ParseURL(char *url) { + char *sep, sep2; + URLParts urlParts = {0}; + + sep = strrchr(url, '#'); + if (sep) { + *sep = '\0'; + urlParts.fragment = sep + 1; + } + + /* Detect relative URL (absolute-path reference only) */ + if (url[0] == '/') { + if (url[1] != '/') { + urlParts.path = url + 1; + return urlParts; + } else { + urlParts.errorFound = 1; + return urlParts; + } + } + + sep = strchr(url, ':'); + if (sep) { + urlParts.scheme = url; + *sep = '\0'; + url = sep + 1; + } + + if (urlParts.scheme != NULL) { + if (strncmp(url, "//", 2) == 0) { + url += 2; + } else { + urlParts.errorFound = 1; + return urlParts; + } + } + + urlParts.path = strchr(url, '/'); + if (urlParts.path == NULL) { + urlParts.path = ""; + } else { + *urlParts.path = '\0'; + urlParts.path++; + } + + sep = strchr(url, '@'); + if (sep) { + *sep = '\0'; + urlParts.username = url; + + urlParts.password = strchr(url, ':'); + if (urlParts.password != NULL) { + *urlParts.password = '\0'; + urlParts.password++; + } + + url = sep + 1; + } + + urlParts.host = url; + + /* Handle IPv6 address syntax */ + if (*url == '[') { + sep = strchr(url, ']'); + if (sep) { + url = sep + 1; + } else { + urlParts.errorFound = 1; + return urlParts; + } + } + + sep = strchr(url, ':'); + if (sep) { + *sep = '\0'; + urlParts.port = sep + 1; + } + + return urlParts; +} + + +#ifdef URLPARSER_TEST +#include + +int main(int argc, char **argv) +{ + URLParts urlParts; + + if (argc < 2) + return 1; + + urlParts = ParseURL(argv[1]); + + printf("scheme: %s\n", urlParts.scheme ? urlParts.scheme : "(NULL)"); + printf("username: %s\n", urlParts.username ? urlParts.username : "(NULL)"); + printf("password: %s\n", urlParts.password ? urlParts.password : "(NULL)"); + printf("host: %s\n", urlParts.host ? urlParts.host : "(NULL)"); + printf("port: %s\n", urlParts.port ? urlParts.port : "(NULL)"); + printf("path: %s\n", urlParts.path ? urlParts.path : "(NULL)"); + printf("fragment: %s\n", urlParts.fragment ? urlParts.fragment : "(NULL)"); + + if (urlParts.errorFound) { + printf("Error found\n"); + } +} +#endif diff --git a/urlparser.h b/urlparser.h new file mode 100644 index 0000000..091cc71 --- /dev/null +++ b/urlparser.h @@ -0,0 +1,25 @@ +#ifndef URLPARSER_H +#define URLPARSER_H + +#ifdef __ORCAC__ +# include +#else +typedef _Bool Boolean; +#endif + +typedef struct URLParts { + char *scheme; + char *username; + char *password; + char *host; + char *port; + char *path; /* Omits leading '/'. Includes query, if any. */ + char *fragment; + + Boolean errorFound; +} URLParts; + + +URLParts ParseURL(char *url); + +#endif diff --git a/version.h b/version.h new file mode 100644 index 0000000..ae8b58b --- /dev/null +++ b/version.h @@ -0,0 +1 @@ +#define USER_AGENT_VERSION "1.0b1" /* reported in User-Agent string */