Oliver Schmidt 41de6a76bd Allow URLs and URL selectors > 255 chars.
Note: The URL selector is stored in the output_buffer - which is currently 520 bytes. Beside all of the URL (apart from a potential "http://") the 'get' and the 'http_preamble' have to fit into that buffer. Therefore URLs mustn't exceed 450 chars. However, we omit a check to avoid further code size increase as most of the time URLs are known to be much shorter anyhow. If the URLs might become large we just leave it up to the user to check their length.
2018-10-30 22:02:47 +01:00

298 lines
5.8 KiB

; routine for parsing a URL
.include ""
.include "../inc/"
.include "../inc/"
.import output_buffer
.import ip65_error
.import parser_init
.import parser_skip_next
.import dns_set_hostname
.import dns_resolve
.import parse_integer
.import dns_ip
.export url_ip
.export url_port
.export url_selector
.export url_resource_type
.export url_parse
search_string = ptr1
selector_buffer = output_buffer
url_string: .res 2
url_ip: .res 4 ; will be set with ip address of host in url
url_port: .res 2 ; will be set with port number of url
url_selector: .res 2 ; will be set with address of selector part of URL
url_type: .res 1
url_resource_type: .res 1
url_type_unknown = 0
url_type_gopher = 1
url_type_http = 2
src_ptr: .res 1
dest_ptr: .res 1
; parses a URL into a form that makes it easy to retrieve the specified resource
; caution - the resulting selector part of URL must fit into the output_buffer !!!
; inputs:
; AX = address of URL string
; any control character (i.e. <$20) is treated as 'end of string', e.g. a CR or LF, as well as $00
; outputs:
; sec if a malformed url, otherwise:
; url_ip = ip address of host in url
; url_port = port number of url
; url_selector= address of selector part of URL
stax url_string
ldy #url_type_http
sty url_type
ldy #80
sty url_port
ldy #0
sty url_port+1
sty url_resource_type
jsr skip_to_hostname
bcc :+
ldax url_string
jmp @no_protocol_specifier
: ldax url_string
stax search_string
lda (search_string),y
cmp #'g'
beq @gopher
cmp #'G'
beq @gopher
cmp #'h'
beq @protocol_set
cmp #'H'
beq @protocol_set
sta ip65_error
lda #url_type_gopher
sta url_type
lda #70
sta url_port
jsr skip_to_hostname
; now pointing at hostname
bcs @exit_with_error
jsr dns_set_hostname
bcs @exit_with_sec
jsr dns_resolve
bcc :+
sta ip65_error
jmp @exit_with_sec
: ; copy IP address
ldx #3
: lda dns_ip,x
sta url_ip,x
bpl :-
jsr skip_to_hostname
; skip over next colon
ldax #colon
jsr parser_skip_next
bcs @no_port_in_url
; AX now point at first thing past a colon - should be a number:
jsr parse_integer
stax url_port
; skip over next slash
ldax #slash
jsr parser_skip_next
bcc :+
; No slash at all after hostname -> empty selector
ldax #zero
: ; AX now pointing at selector
stax ptr1
ldax #selector_buffer
stax ptr2
lda #0
sta src_ptr
sta dest_ptr
lda url_type
cmp #url_type_gopher
bne @not_gopher
; first byte after / in a gopher url is the resource type
ldy src_ptr
lda (ptr1),y
beq @start_of_selector
sta url_resource_type
inc src_ptr
jmp @start_of_selector
cmp #url_type_http
beq @build_http_request
jmp @done ; if it's not gopher or http, we don't know how to build a selector
ldy #get_length-1
sty dest_ptr
: lda get,y
sta (ptr2),y
bpl :-
lda #'/'
inc dest_ptr
jmp @save_first_byte_of_selector
ldy src_ptr
lda (ptr1),y
cmp #$20
bcc @end_of_selector ; any control char (including CR,LF, and $00) should be treated as end of URL
inc src_ptr
bne @save_first_byte_of_selector
inc ptr1+1
ldy dest_ptr
sta (ptr2),y
inc dest_ptr
bne @copy_one_byte
inc ptr2+1
jmp @copy_one_byte
ldx #1 ; number of CRLF at end of gopher request
lda url_type
cmp #url_type_http
bne @final_crlf
; now the HTTP version number & Host: field
ldx #0
: lda http_preamble,x
beq :++
ldy dest_ptr
sta (ptr2),y
inc dest_ptr
bne :+
inc ptr2+1
: inx
bne :--
: ; now copy the host field
lda ptr2+1
jsr skip_to_hostname
; AX now pointing at hostname
stax ptr1
ldax #<selector_buffer
sta ptr2
sta ptr2+1
lda #0
sta src_ptr
ldy src_ptr
lda (ptr1),y
beq @end_of_hostname
cmp #':'
beq @end_of_hostname
cmp #'/'
beq @end_of_hostname
inc src_ptr
ldy dest_ptr
sta (ptr2),y
inc dest_ptr
bne @copy_one_byte_of_hostname
inc ptr2+1
jmp @copy_one_byte_of_hostname
ldx #2 ; number of CRLF at end of HTTP request
ldy dest_ptr
lda #$0d
sta (ptr2),y
bne :+
inc ptr2+1
: lda #$0a
sta (ptr2),y
bne :+
inc ptr2+1
: sty dest_ptr
bne @final_crlf
lda #$00
sta (ptr2),y
ldax #selector_buffer
stax url_selector
ldax url_string
jsr parser_init
ldax #colon_slash_slash
jmp parser_skip_next
.byte "GET "
get_length = 4
.byte " HTTP/1.0",$0d,$0a
.byte "User-Agent: IP65/0.6502",$0d,$0a
.byte "Connection: close",$0d,$0a
.byte "Host: ",0
.byte ":/"
.byte "/"
.byte 0
.byte ":",0
; -- LICENSE FOR url.s --
; The contents of this file are subject to the Mozilla Public License
; Version 1.1 (the "License"); you may not use this file except in
; compliance with the License. You may obtain a copy of the License at
; Software distributed under the License is distributed on an "AS IS"
; basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
; License for the specific language governing rights and limitations
; under the License.
; The Original Code is ip65.
; The Initial Developer of the Original Code is Jonno Downes,
; Portions created by the Initial Developer are Copyright (C) 2009
; Jonno Downes. All Rights Reserved.