diff --git a/lib/isc/Makefile.am b/lib/isc/Makefile.am index 0c1bb816eb..5e7164661c 100644 --- a/lib/isc/Makefile.am +++ b/lib/isc/Makefile.am @@ -123,6 +123,7 @@ libisc_la_SOURCES = \ $(libisc_la_HEADERS) \ $(pk11_HEADERS) \ $(pkcs11_HEADERS) \ + netmgr/http.c \ netmgr/netmgr-int.h \ netmgr/netmgr.c \ netmgr/tcp.c \ @@ -132,6 +133,7 @@ libisc_la_SOURCES = \ netmgr/uv-compat.c \ netmgr/uv-compat.h \ netmgr/uverr2result.c \ + netmgr/url-parser/url_parser.c \ unix/pk11_api.c \ unix/dir.c \ unix/errno.c \ diff --git a/lib/isc/netmgr/http.c b/lib/isc/netmgr/http.c new file mode 100644 index 0000000000..5efcea967b --- /dev/null +++ b/lib/isc/netmgr/http.c @@ -0,0 +1,599 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "netmgr-int.h" +#include "url-parser/url_parser.h" + +#define AUTHEXTRA 7 + +typedef struct { + char *uri; + struct http_parser_url u; + + char *authority; + size_t authoritylen; + char *path; + + size_t pathlen; + int32_t stream_id; + isc_region_t *postdata; + size_t postdata_pos; +} http2_stream; + +typedef struct { + isc_mem_t *mctx; + nghttp2_session *ngsession; + http2_stream *stream; + isc_nmhandle_t *handle; + + uint8_t buf[65535]; + size_t bufsize; + uint8_t rbuf[65535]; + size_t rbufsize; + + isc_nm_recv_cb_t cb; + void *cbarg; + + SSL_CTX *ctx; + bool reading; +} http2_session; + +static bool +http2_do_bio(http2_session *session); + +static void +writecb(isc_nmhandle_t *handle, isc_result_t result, void *ptr); + +static isc_result_t +get_http2_stream(isc_mem_t *mctx, http2_stream **streamp, const char *uri, + uint16_t *port) { + INSIST(streamp != NULL && *streamp == NULL); + INSIST(uri != NULL); + INSIST(port != NULL); + + int rv; + http2_stream *stream = isc_mem_get(mctx, sizeof(http2_stream)); + stream->uri = isc_mem_strdup(mctx, uri); + rv = http_parser_parse_url(stream->uri, strlen(stream->uri), 0, + &stream->u); + if (rv != 0) { + isc_mem_put(mctx, stream, sizeof(http2_stream)); + isc_mem_free(mctx, stream->uri); + return (ISC_R_FAILURE); + } + stream->stream_id = -1; + + stream->authoritylen = stream->u.field_data[UF_HOST].len; + stream->authority = isc_mem_get(mctx, stream->authoritylen + AUTHEXTRA); + memcpy(stream->authority, &uri[stream->u.field_data[UF_HOST].off], + stream->u.field_data[UF_HOST].len); + if (stream->u.field_set & (1 << UF_PORT)) { + stream->authoritylen += (size_t)snprintf( + stream->authority + stream->u.field_data[UF_HOST].len, + AUTHEXTRA, ":%u", stream->u.port); + } + + /* If we don't have path in URI, we use "/" as path. */ + stream->pathlen = 1; + if (stream->u.field_set & (1 << UF_PATH)) { + stream->pathlen = stream->u.field_data[UF_PATH].len; + } + if (stream->u.field_set & (1 << UF_QUERY)) { + /* +1 for '?' character */ + stream->pathlen += + (size_t)(stream->u.field_data[UF_QUERY].len + 1); + } + + stream->path = isc_mem_get(mctx, stream->pathlen); + if (stream->u.field_set & (1 << UF_PATH)) { + memcpy(stream->path, &uri[stream->u.field_data[UF_PATH].off], + stream->u.field_data[UF_PATH].len); + } else { + stream->path[0] = '/'; + } + if (stream->u.field_set & (1 << UF_QUERY)) { + stream->path[stream->pathlen - + stream->u.field_data[UF_QUERY].len - 1] = '?'; + memcpy(stream->path + stream->pathlen - + stream->u.field_data[UF_QUERY].len, + &uri[stream->u.field_data[UF_QUERY].off], + stream->u.field_data[UF_QUERY].len); + } + + if (!(stream->u.field_set & (1 << UF_PORT))) { + *port = 443; + } else { + *port = stream->u.port; + } + *streamp = stream; + return (ISC_R_SUCCESS); +} + +static void +put_http2_stream(isc_mem_t *mctx, http2_stream *stream) { + isc_mem_put(mctx, stream->path, stream->pathlen); + isc_mem_put(mctx, stream->authority, stream->authoritylen + AUTHEXTRA); + isc_mem_put(mctx, stream, sizeof(http2_stream)); +} + +static void +delete_http2_session(http2_session *session) { + if (session->handle != NULL) { + isc_nmhandle_unref(session->handle); + session->handle = NULL; + } + if (session->ngsession != NULL) { + nghttp2_session_del(session->ngsession); + session->ngsession = NULL; + } + if (session->stream != NULL) { + put_http2_stream(session->mctx, session->stream); + session->stream = NULL; + } + + isc_mem_putanddetach(&session->mctx, session, sizeof(http2_session)); +} + +#if 0 +/* XXXWPK do we need these callback? We might want to verify headers */ +on_header_callback(nghttp2_session *ngsession, const nghttp2_frame *frame, + const uint8_t *name, size_t namelen, const uint8_t *value, + size_t valuelen, uint8_t flags, void *user_data) { + http2_session *session = (http2_session *)user_data; + UNUSED(ngsession); + UNUSED(flags); + switch (frame->hd.type) { + case NGHTTP2_HEADERS: + if (frame->headers.cat == NGHTTP2_HCAT_RESPONSE && + session->stream->stream_id == frame->hd.stream_id) + { + break; + } + } + return (0); +} + +static int +on_begin_headers_callback(nghttp2_session *ngsession, const nghttp2_frame *frame, + void *user_data) { + http2_session *session = (http2_session *)user_data; + UNUSED(ngsession); + switch (frame->hd.type) { + case NGHTTP2_HEADERS: + if (frame->headers.cat == NGHTTP2_HCAT_RESPONSE && + session->stream->stream_id == frame->hd.stream_id) + { + /* XXX */ + } + break; + } + return (0); +} + +on_frame_recv_callback(nghttp2_session *ngsession, const nghttp2_frame *frame, + void *user_data) { + http2_session *session = (http2_session *)user_data; + UNUSED(ngsession); + switch (frame->hd.type) { + case NGHTTP2_HEADERS: + if (frame->headers.cat == NGHTTP2_HCAT_RESPONSE && + session->stream->stream_id == frame->hd.stream_id) + { + /* XXX */ + } + break; + } + return (0); +} +#endif + +static int +on_data_chunk_recv_callback(nghttp2_session *ngsession, uint8_t flags, + int32_t stream_id, const uint8_t *data, size_t len, + void *user_data) { + http2_session *session = (http2_session *)user_data; + UNUSED(ngsession); + UNUSED(flags); + if (session->stream->stream_id == stream_id) { + /* TODO buffer overrun! */ + memmove(session->rbuf + session->rbufsize, data, len); + session->rbufsize += len; + } + return (0); +} + +static int +on_stream_close_callback(nghttp2_session *ngsession, int32_t stream_id, + uint32_t error_code, void *user_data) { + UNUSED(error_code); + + http2_session *session = (http2_session *)user_data; + int rv; + if (session->stream->stream_id == stream_id) { + rv = nghttp2_session_terminate_session(ngsession, + NGHTTP2_NO_ERROR); + if (rv != 0) { + return (NGHTTP2_ERR_CALLBACK_FAILURE); + } + } + session->cb(NULL, ISC_R_SUCCESS, + &(isc_region_t){ session->rbuf, session->rbufsize }, + session->cbarg); + /* XXXWPK TODO we need to close the session */ + + return (0); +} + +#ifndef OPENSSL_NO_NEXTPROTONEG +/* NPN TLS extension client callback. We check that server advertised + the HTTP/2 protocol the nghttp2 library supports. If not, exit + the program. */ +static int +select_next_proto_cb(SSL *ssl, unsigned char **out, unsigned char *outlen, + const unsigned char *in, unsigned int inlen, void *arg) { + UNUSED(ssl); + UNUSED(arg); + + if (nghttp2_select_next_protocol(out, outlen, in, inlen) <= 0) { + /* TODO */ + } + return (SSL_TLSEXT_ERR_OK); +} +#endif /* !OPENSSL_NO_NEXTPROTONEG */ + +/* Create SSL_CTX. */ +static SSL_CTX * +create_ssl_ctx(void) { + SSL_CTX *ssl_ctx; + ssl_ctx = SSL_CTX_new(SSLv23_client_method()); + if (!ssl_ctx) { + /* TODO */ + abort(); + } + SSL_CTX_set_options( + ssl_ctx, SSL_OP_ALL | SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | + SSL_OP_NO_COMPRESSION | + SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION); +#ifndef OPENSSL_NO_NEXTPROTONEG + SSL_CTX_set_next_proto_select_cb(ssl_ctx, select_next_proto_cb, NULL); +#endif /* !OPENSSL_NO_NEXTPROTONEG */ + +#if OPENSSL_VERSION_NUMBER >= 0x10002000L + SSL_CTX_set_alpn_protos(ssl_ctx, (const unsigned char *)"\x02h2", 3); +#endif /* OPENSSL_VERSION_NUMBER >= 0x10002000L */ + + return (ssl_ctx); +} + +static void +initialize_nghttp2_session(http2_session *session) { + nghttp2_session_callbacks *callbacks; + + nghttp2_session_callbacks_new(&callbacks); + + nghttp2_session_callbacks_set_on_data_chunk_recv_callback( + callbacks, on_data_chunk_recv_callback); + + nghttp2_session_callbacks_set_on_stream_close_callback( + callbacks, on_stream_close_callback); + +#if 0 +/* Do we need it ? */ + nghttp2_session_callbacks_set_on_header_callback(callbacks, + on_header_callback); + + nghttp2_session_callbacks_set_on_begin_headers_callback( + callbacks, on_begin_headers_callback); + + nghttp2_session_callbacks_set_on_frame_recv_callback( + callbacks, on_frame_recv_callback); + +#endif + + nghttp2_session_client_new(&session->ngsession, callbacks, session); + + nghttp2_session_callbacks_del(callbacks); +} + +static void +send_client_connection_header(http2_session *session) { + nghttp2_settings_entry iv[1] = { + { NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 100 } + }; + int rv; + + rv = nghttp2_submit_settings(session->ngsession, NGHTTP2_FLAG_NONE, iv, + 1); + if (rv != 0) { + /* TODO */ + } + http2_do_bio(session); +} + +#define MAKE_NV(NAME, VALUE, VALUELEN) \ + { \ + (uint8_t *)NAME, (uint8_t *)VALUE, sizeof(NAME) - 1, VALUELEN, \ + NGHTTP2_NV_FLAG_NONE \ + } + +#define MAKE_NV2(NAME, VALUE) \ + { \ + (uint8_t *)NAME, (uint8_t *)VALUE, sizeof(NAME) - 1, \ + sizeof(VALUE) - 1, NGHTTP2_NV_FLAG_NONE \ + } + +static ssize_t +post_read_callback(nghttp2_session *ngsession, int32_t stream_id, uint8_t *buf, + size_t length, uint32_t *data_flags, + nghttp2_data_source *source, void *user_data) { + http2_session *session = (http2_session *)user_data; + UNUSED(ngsession); + UNUSED(source); + + if (session->stream->stream_id == stream_id) { + size_t len = session->stream->postdata->length - + session->stream->postdata_pos; + if (len > length) { + len = length; + } + memcpy(buf, + session->stream->postdata->base + + session->stream->postdata_pos, + len); + session->stream->postdata_pos += len; + if (session->stream->postdata_pos == + session->stream->postdata->length) { + *data_flags |= NGHTTP2_DATA_FLAG_EOF; + } + return (len); + } + return (0); +} + +/* Send HTTP request to the remote peer */ +static isc_result_t +submit_request(http2_session *session) { + int32_t stream_id; + http2_stream *stream = session->stream; + char *uri = stream->uri; + struct http_parser_url *u = &stream->u; + char p[64]; + snprintf(p, 64, "%u", stream->postdata->length); + + nghttp2_nv hdrs[] = { + MAKE_NV2(":method", "POST"), + MAKE_NV(":scheme", &uri[u->field_data[UF_SCHEMA].off], + u->field_data[UF_SCHEMA].len), + MAKE_NV(":authority", stream->authority, stream->authoritylen), + MAKE_NV(":path", stream->path, stream->pathlen), + MAKE_NV2("content-type", "application/dns-message"), + MAKE_NV2("accept", "application/dns-message"), + MAKE_NV("content-length", p, strlen(p)) + }; + + nghttp2_data_provider dp = { .read_callback = post_read_callback }; + stream_id = nghttp2_submit_request(session->ngsession, NULL, hdrs, 7, + &dp, stream); + if (stream_id < 0) { + return (ISC_R_FAILURE); + } + stream->stream_id = stream_id; + http2_do_bio(session); + return (ISC_R_SUCCESS); +} + +/* + * read callback from TLS socket. + */ +static void +readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, + void *data) { + UNUSED(handle); + UNUSED(result); + http2_session *session = (http2_session *)data; + + ssize_t readlen = nghttp2_session_mem_recv( + session->ngsession, region->base, region->length); + + if (readlen < 0) { + delete_http2_session(session); + /* TODO callback! */ + return; + } + if (readlen < region->length) { + INSIST(session->bufsize == 0); + INSIST(region->length - readlen < 65535); + memmove(session->buf, region->base, region->length - readlen); + session->bufsize = region->length - readlen; + isc_nm_pauseread(session->handle); + } + + /* We might have something to receive or send, do IO */ + http2_do_bio(session); +} + +static bool +http2_do_bio(http2_session *session) { + if (nghttp2_session_want_read(session->ngsession) == 0 && + nghttp2_session_want_write(session->ngsession) == 0) + { + delete_http2_session(session); + return (false); + } + + if (nghttp2_session_want_read(session->ngsession) != 0) { + if (!session->reading) { + /* We have not yet started reading from this handle */ + isc_nm_read(session->handle, readcb, session); + session->reading = true; + } else if (session->bufsize > 0) { + /* Leftover data in the buffer, use it */ + size_t readlen = nghttp2_session_mem_recv( + session->ngsession, session->buf, + session->bufsize); + if (readlen == session->bufsize) { + session->bufsize = 0; + } else { + memmove(session->buf, session->buf + readlen, + session->bufsize - readlen); + session->bufsize -= readlen; + } + http2_do_bio(session); + return (false); + } else { + /* Resume reading, it's idempotent, wait for more */ + isc_nm_resumeread(session->handle); + } + } else { + /* We don't want more data, stop reading for now */ + isc_nm_pauseread(session->handle); + } + + if (nghttp2_session_want_write(session->ngsession) != 0) { + const uint8_t *data; + + /* + * XXXWPK TODO + * This function may produce very small byte string. If that + * is the case, and application disables Nagle algorithm + * (``TCP_NODELAY``), then writing this small chunk leads to + * very small packet, and it is very inefficient. An + * application should be responsible to buffer up small chunks + * of data as necessary to avoid this situation. + */ + size_t sz = nghttp2_session_mem_send(session->ngsession, &data); + isc_region_t region; + region.base = malloc(sz); + region.length = sz; + memcpy(region.base, data, sz); + isc_result_t result = isc_nm_send(session->handle, ®ion, + writecb, session); + if (result != ISC_R_SUCCESS) { + abort(); + } + return (true); + } + return (false); +} + +static void +writecb(isc_nmhandle_t *handle, isc_result_t result, void *ptr) { + UNUSED(handle); + UNUSED(result); + http2_session *session = (http2_session *)ptr; + http2_do_bio(session); +} + +static void +connect_cb(isc_nmhandle_t *handle, isc_result_t result, void *arg) { + http2_session *session = (http2_session *)arg; + if (result != ISC_R_SUCCESS) { + delete_http2_session(session); + return; + } + session->handle = handle; + isc_nmhandle_ref(handle); + +#if 0 +/* TODO H2 */ +#ifndef OPENSSL_NO_NEXTPROTONEG + SSL_get0_next_proto_negotiated(ssl, &alpn, &alpnlen); +#endif +#if OPENSSL_VERSION_NUMBER >= 0x10002000L + if (alpn == NULL) { + SSL_get0_alpn_selected(ssl, &alpn, &alpnlen); + } +#endif + + if (alpn == NULL || alpnlen != 2 || memcmp("h2", alpn, 2) != 0) + { + fprintf(stderr, "h2 is not negotiated\n"); + delete_http2_session(session); + return; + } +#endif + initialize_nghttp2_session(session); + send_client_connection_header(session); + submit_request(session); + http2_do_bio(session); +} + +isc_result_t +isc_nm_doh_request(isc_nm_t *mgr, const char *uri, isc_region_t *message, + isc_nm_recv_cb_t cb, void *cbarg, SSL_CTX *ctx) { + uint16_t port; + char *host; + http2_session *session; + struct addrinfo hints; + struct addrinfo *res; + isc_sockaddr_t local, peer; + isc_result_t result; + + if (ctx == NULL) { + ctx = create_ssl_ctx(); + } + + session = isc_mem_get(mgr->mctx, sizeof(http2_session)); + *session = (http2_session){ .cb = cb, .cbarg = cbarg, .ctx = ctx }; + isc_mem_attach(mgr->mctx, &session->mctx); + + result = get_http2_stream(mgr->mctx, &session->stream, uri, &port); + if (result != ISC_R_SUCCESS) { + delete_http2_session(session); + return (result); + } + session->stream->postdata = message; + session->stream->postdata_pos = 0; + + /* TODO do this properly!!! */ + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_family = PF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags |= AI_CANONNAME; + host = strndup( + &session->stream + ->uri[session->stream->u.field_data[UF_HOST].off], + session->stream->u.field_data[UF_HOST].len); + + int s = getaddrinfo(host, NULL, &hints, &res); + + free(host); + if (s != 0) { + return (ISC_R_FAILURE); + } + + isc_sockaddr_fromsockaddr(&peer, res->ai_addr); + isc_sockaddr_setport(&peer, port); + isc_sockaddr_anyofpf(&local, res->ai_family); + + freeaddrinfo(res); + + result = isc_nm_tlsconnect(mgr, (isc_nmiface_t *)&local, + (isc_nmiface_t *)&peer, connect_cb, session, + ctx, 0); + + if (result != ISC_R_SUCCESS) { + return (result); + } + + return (ISC_R_SUCCESS); +} diff --git a/lib/isc/netmgr/url-parser/url_parser.c b/lib/isc/netmgr/url-parser/url_parser.c new file mode 100644 index 0000000000..92b8cf94c5 --- /dev/null +++ b/lib/isc/netmgr/url-parser/url_parser.c @@ -0,0 +1,652 @@ +/* Copyright Joyent, Inc. and other Node contributors. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "url_parser.h" +#include +#include +#include +#include +#include + +#ifndef BIT_AT +# define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#endif + +#if HTTP_PARSER_STRICT +# define T(v) 0 +#else +# define T(v) v +#endif + +static const uint8_t normal_url_char[32] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; + +#undef T + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_http_major + , s_res_http_dot + , s_res_http_minor + , s_res_http_end + , s_res_first_status_code + , s_res_status_code + , s_res_status_start + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_server_start + , s_req_server + , s_req_server_with_at + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_http_I + , s_req_http_IC + , s_req_http_major + , s_req_http_dot + , s_req_http_minor + , s_req_http_end + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_discard_ws + , s_header_value_discard_ws_almost_done + , s_header_value_discard_lws + , s_header_value_start + , s_header_value + , s_header_value_lws + + , s_header_almost_done + + , s_chunk_size_start + , s_chunk_size + , s_chunk_parameters + , s_chunk_size_almost_done + + , s_headers_almost_done + , s_headers_done + + /* Important: 's_headers_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + + , s_message_done + }; + +enum http_host_state + { + s_http_host_dead = 1 + , s_http_userinfo_start + , s_http_userinfo + , s_http_host_start + , s_http_host_v6_start + , s_http_host + , s_http_host_v6 + , s_http_host_v6_end + , s_http_host_v6_zone_start + , s_http_host_v6_zone + , s_http_host_port_start + , s_http_host_port +}; + +/* Macros for character classes; depends on strict-mode */ +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) +#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ + (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ + (c) == ')') +#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ + (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ + (c) == '$' || (c) == ',') + +#define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c]) + +#if HTTP_PARSER_STRICT +#define TOKEN(c) STRICT_TOKEN(c) +#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define TOKEN(c) tokens[(unsigned char)c] +#define IS_URL_CHAR(c) \ + (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static enum state +parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#if HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_server_start; + } + + break; + + case s_req_server_with_at: + if (ch == '@') { + return s_dead; + } + + /* fall through */ + case s_req_server_start: + case s_req_server: + if (ch == '/') { + return s_req_path; + } + + if (ch == '?') { + return s_req_query_string_start; + } + + if (ch == '@') { + return s_req_server_with_at; + } + + if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { + return s_req_server; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} + +static enum http_host_state +http_parse_host_char(enum http_host_state s, const char ch) { + switch(s) { + case s_http_userinfo: + case s_http_userinfo_start: + if (ch == '@') { + return s_http_host_start; + } + + if (IS_USERINFO_CHAR(ch)) { + return s_http_userinfo; + } + break; + + case s_http_host_start: + if (ch == '[') { + return s_http_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + break; + + case s_http_host: + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + /* fall through */ + case s_http_host_v6_end: + if (ch == ':') { + return s_http_host_port_start; + } + + break; + + case s_http_host_v6: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_start: + if (IS_HEX(ch) || ch == ':' || ch == '.') { + return s_http_host_v6; + } + + if (s == s_http_host_v6 && ch == '%') { + return s_http_host_v6_zone_start; + } + break; + + case s_http_host_v6_zone: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_zone_start: + /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ + if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || + ch == '~') { + return s_http_host_v6_zone; + } + break; + + case s_http_host_port: + case s_http_host_port_start: + if (IS_NUM(ch)) { + return s_http_host_port; + } + + break; + + default: + break; + } + return s_http_host_dead; +} + +static int +http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { + enum http_host_state s; + + const char *p; + size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + + assert(u->field_set & (1 << UF_HOST)); + + u->field_data[UF_HOST].len = 0; + + s = found_at ? s_http_userinfo_start : s_http_host_start; + + for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { + enum http_host_state new_s = http_parse_host_char(s, *p); + + if (new_s == s_http_host_dead) { + return 1; + } + + switch(new_s) { + case s_http_host: + if (s != s_http_host) { + u->field_data[UF_HOST].off = (uint16_t)(p - buf); + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6: + if (s != s_http_host_v6) { + u->field_data[UF_HOST].off = (uint16_t)(p - buf); + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + u->field_data[UF_HOST].len++; + break; + + case s_http_host_port: + if (s != s_http_host_port) { + u->field_data[UF_PORT].off = (uint16_t)(p - buf); + u->field_data[UF_PORT].len = 0; + u->field_set |= (1 << UF_PORT); + } + u->field_data[UF_PORT].len++; + break; + + case s_http_userinfo: + if (s != s_http_userinfo) { + u->field_data[UF_USERINFO].off = (uint16_t)(p - buf); + u->field_data[UF_USERINFO].len = 0; + u->field_set |= (1 << UF_USERINFO); + } + u->field_data[UF_USERINFO].len++; + break; + + default: + break; + } + s = new_s; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_http_host_start: + case s_http_host_v6_start: + case s_http_host_v6: + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + case s_http_host_port_start: + case s_http_userinfo: + case s_http_userinfo_start: + return 1; + default: + break; + } + + return 0; +} + +void +http_parser_url_init(struct http_parser_url *u) { + memset(u, 0, sizeof(*u)); +} + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + int found_at = 0; + + if (buflen == 0) { + return 1; + } + + u->port = u->field_set = 0; + s = is_connect ? s_req_server_start : s_req_spaces_before_url; + old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimeters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_server_with_at: + found_at = 1; + + /* fall through */ + case s_req_server: + uf = UF_HOST; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = (uint16_t)(p - buf); + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* host must be present if there is a schema */ + /* parsing http:///toto will fail */ + if ((u->field_set & (1 << UF_SCHEMA)) && + (u->field_set & (1 << UF_HOST)) == 0) { + return 1; + } + + if (u->field_set & (1 << UF_HOST)) { + if (http_parse_host(buf, u, found_at) != 0) { + return 1; + } + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + if (u->field_set & (1 << UF_PORT)) { + uint16_t off; + uint16_t len; + const char* pp; + const char* end; + unsigned long v; + + off = u->field_data[UF_PORT].off; + len = u->field_data[UF_PORT].len; + end = buf + off + len; + + /* NOTE: The characters are already validated and are in the [0-9] range */ + assert(off + len <= buflen && "Port number overflow"); + v = 0; + for (pp = buf + off; pp < end; p++) { + v *= 10; + v += *pp - '0'; + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + } + + u->port = (uint16_t) v; + } + + return 0; +} diff --git a/lib/isc/netmgr/url-parser/url_parser.h b/lib/isc/netmgr/url-parser/url_parser.h new file mode 100644 index 0000000000..78b3096c53 --- /dev/null +++ b/lib/isc/netmgr/url-parser/url_parser.h @@ -0,0 +1,94 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef url_parser_h +#define url_parser_h +#ifdef __cplusplus +extern "C" { +#endif + +/* Also update SONAME in the Makefile whenever you change these. */ +#define HTTP_PARSER_VERSION_MAJOR 2 +#define HTTP_PARSER_VERSION_MINOR 9 +#define HTTP_PARSER_VERSION_PATCH 1 + +#include +#if defined(_WIN32) && !defined(__MINGW32__) && \ + (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__) +#include +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#endif + +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_USERINFO = 6 + , UF_MAX = 7 + }; + + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + +/* Initialize all http_parser_url members to 0 */ +void http_parser_url_init(struct http_parser_url *u); + +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, size_t buflen, + int is_connect, + struct http_parser_url *u); +#ifdef __cplusplus +} +#endif +#endif