nghttp2-based HTTP client with DoH support

This commit is contained in:
Witold Kręcicki
2020-08-06 11:02:48 +02:00
committed by Evan Hunt
parent 213376b8fa
commit bc3747f56b
4 changed files with 1347 additions and 0 deletions

View File

@@ -123,6 +123,7 @@ libisc_la_SOURCES = \
$(libisc_la_HEADERS) \
$(pk11_HEADERS) \
$(pkcs11_HEADERS) \
netmgr/http.c \
netmgr/netmgr-int.h \
netmgr/netmgr.c \
netmgr/tcp.c \
@@ -132,6 +133,7 @@ libisc_la_SOURCES = \
netmgr/uv-compat.c \
netmgr/uv-compat.h \
netmgr/uverr2result.c \
netmgr/url-parser/url_parser.c \
unix/pk11_api.c \
unix/dir.c \
unix/errno.c \

599
lib/isc/netmgr/http.c Normal file
View File

@@ -0,0 +1,599 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#include <nghttp2/nghttp2.h>
#include <signal.h>
#include <string.h>
#include <openssl/conf.h>
#include <openssl/err.h>
#include <openssl/ssl.h>
#include <isc/base64.h>
#include <isc/netmgr.h>
#include "netmgr-int.h"
#include "url-parser/url_parser.h"
#define AUTHEXTRA 7
typedef struct {
char *uri;
struct http_parser_url u;
char *authority;
size_t authoritylen;
char *path;
size_t pathlen;
int32_t stream_id;
isc_region_t *postdata;
size_t postdata_pos;
} http2_stream;
typedef struct {
isc_mem_t *mctx;
nghttp2_session *ngsession;
http2_stream *stream;
isc_nmhandle_t *handle;
uint8_t buf[65535];
size_t bufsize;
uint8_t rbuf[65535];
size_t rbufsize;
isc_nm_recv_cb_t cb;
void *cbarg;
SSL_CTX *ctx;
bool reading;
} http2_session;
static bool
http2_do_bio(http2_session *session);
static void
writecb(isc_nmhandle_t *handle, isc_result_t result, void *ptr);
static isc_result_t
get_http2_stream(isc_mem_t *mctx, http2_stream **streamp, const char *uri,
uint16_t *port) {
INSIST(streamp != NULL && *streamp == NULL);
INSIST(uri != NULL);
INSIST(port != NULL);
int rv;
http2_stream *stream = isc_mem_get(mctx, sizeof(http2_stream));
stream->uri = isc_mem_strdup(mctx, uri);
rv = http_parser_parse_url(stream->uri, strlen(stream->uri), 0,
&stream->u);
if (rv != 0) {
isc_mem_put(mctx, stream, sizeof(http2_stream));
isc_mem_free(mctx, stream->uri);
return (ISC_R_FAILURE);
}
stream->stream_id = -1;
stream->authoritylen = stream->u.field_data[UF_HOST].len;
stream->authority = isc_mem_get(mctx, stream->authoritylen + AUTHEXTRA);
memcpy(stream->authority, &uri[stream->u.field_data[UF_HOST].off],
stream->u.field_data[UF_HOST].len);
if (stream->u.field_set & (1 << UF_PORT)) {
stream->authoritylen += (size_t)snprintf(
stream->authority + stream->u.field_data[UF_HOST].len,
AUTHEXTRA, ":%u", stream->u.port);
}
/* If we don't have path in URI, we use "/" as path. */
stream->pathlen = 1;
if (stream->u.field_set & (1 << UF_PATH)) {
stream->pathlen = stream->u.field_data[UF_PATH].len;
}
if (stream->u.field_set & (1 << UF_QUERY)) {
/* +1 for '?' character */
stream->pathlen +=
(size_t)(stream->u.field_data[UF_QUERY].len + 1);
}
stream->path = isc_mem_get(mctx, stream->pathlen);
if (stream->u.field_set & (1 << UF_PATH)) {
memcpy(stream->path, &uri[stream->u.field_data[UF_PATH].off],
stream->u.field_data[UF_PATH].len);
} else {
stream->path[0] = '/';
}
if (stream->u.field_set & (1 << UF_QUERY)) {
stream->path[stream->pathlen -
stream->u.field_data[UF_QUERY].len - 1] = '?';
memcpy(stream->path + stream->pathlen -
stream->u.field_data[UF_QUERY].len,
&uri[stream->u.field_data[UF_QUERY].off],
stream->u.field_data[UF_QUERY].len);
}
if (!(stream->u.field_set & (1 << UF_PORT))) {
*port = 443;
} else {
*port = stream->u.port;
}
*streamp = stream;
return (ISC_R_SUCCESS);
}
static void
put_http2_stream(isc_mem_t *mctx, http2_stream *stream) {
isc_mem_put(mctx, stream->path, stream->pathlen);
isc_mem_put(mctx, stream->authority, stream->authoritylen + AUTHEXTRA);
isc_mem_put(mctx, stream, sizeof(http2_stream));
}
static void
delete_http2_session(http2_session *session) {
if (session->handle != NULL) {
isc_nmhandle_unref(session->handle);
session->handle = NULL;
}
if (session->ngsession != NULL) {
nghttp2_session_del(session->ngsession);
session->ngsession = NULL;
}
if (session->stream != NULL) {
put_http2_stream(session->mctx, session->stream);
session->stream = NULL;
}
isc_mem_putanddetach(&session->mctx, session, sizeof(http2_session));
}
#if 0
/* XXXWPK do we need these callback? We might want to verify headers */
on_header_callback(nghttp2_session *ngsession, const nghttp2_frame *frame,
const uint8_t *name, size_t namelen, const uint8_t *value,
size_t valuelen, uint8_t flags, void *user_data) {
http2_session *session = (http2_session *)user_data;
UNUSED(ngsession);
UNUSED(flags);
switch (frame->hd.type) {
case NGHTTP2_HEADERS:
if (frame->headers.cat == NGHTTP2_HCAT_RESPONSE &&
session->stream->stream_id == frame->hd.stream_id)
{
break;
}
}
return (0);
}
static int
on_begin_headers_callback(nghttp2_session *ngsession, const nghttp2_frame *frame,
void *user_data) {
http2_session *session = (http2_session *)user_data;
UNUSED(ngsession);
switch (frame->hd.type) {
case NGHTTP2_HEADERS:
if (frame->headers.cat == NGHTTP2_HCAT_RESPONSE &&
session->stream->stream_id == frame->hd.stream_id)
{
/* XXX */
}
break;
}
return (0);
}
on_frame_recv_callback(nghttp2_session *ngsession, const nghttp2_frame *frame,
void *user_data) {
http2_session *session = (http2_session *)user_data;
UNUSED(ngsession);
switch (frame->hd.type) {
case NGHTTP2_HEADERS:
if (frame->headers.cat == NGHTTP2_HCAT_RESPONSE &&
session->stream->stream_id == frame->hd.stream_id)
{
/* XXX */
}
break;
}
return (0);
}
#endif
static int
on_data_chunk_recv_callback(nghttp2_session *ngsession, uint8_t flags,
int32_t stream_id, const uint8_t *data, size_t len,
void *user_data) {
http2_session *session = (http2_session *)user_data;
UNUSED(ngsession);
UNUSED(flags);
if (session->stream->stream_id == stream_id) {
/* TODO buffer overrun! */
memmove(session->rbuf + session->rbufsize, data, len);
session->rbufsize += len;
}
return (0);
}
static int
on_stream_close_callback(nghttp2_session *ngsession, int32_t stream_id,
uint32_t error_code, void *user_data) {
UNUSED(error_code);
http2_session *session = (http2_session *)user_data;
int rv;
if (session->stream->stream_id == stream_id) {
rv = nghttp2_session_terminate_session(ngsession,
NGHTTP2_NO_ERROR);
if (rv != 0) {
return (NGHTTP2_ERR_CALLBACK_FAILURE);
}
}
session->cb(NULL, ISC_R_SUCCESS,
&(isc_region_t){ session->rbuf, session->rbufsize },
session->cbarg);
/* XXXWPK TODO we need to close the session */
return (0);
}
#ifndef OPENSSL_NO_NEXTPROTONEG
/* NPN TLS extension client callback. We check that server advertised
the HTTP/2 protocol the nghttp2 library supports. If not, exit
the program. */
static int
select_next_proto_cb(SSL *ssl, unsigned char **out, unsigned char *outlen,
const unsigned char *in, unsigned int inlen, void *arg) {
UNUSED(ssl);
UNUSED(arg);
if (nghttp2_select_next_protocol(out, outlen, in, inlen) <= 0) {
/* TODO */
}
return (SSL_TLSEXT_ERR_OK);
}
#endif /* !OPENSSL_NO_NEXTPROTONEG */
/* Create SSL_CTX. */
static SSL_CTX *
create_ssl_ctx(void) {
SSL_CTX *ssl_ctx;
ssl_ctx = SSL_CTX_new(SSLv23_client_method());
if (!ssl_ctx) {
/* TODO */
abort();
}
SSL_CTX_set_options(
ssl_ctx, SSL_OP_ALL | SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 |
SSL_OP_NO_COMPRESSION |
SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION);
#ifndef OPENSSL_NO_NEXTPROTONEG
SSL_CTX_set_next_proto_select_cb(ssl_ctx, select_next_proto_cb, NULL);
#endif /* !OPENSSL_NO_NEXTPROTONEG */
#if OPENSSL_VERSION_NUMBER >= 0x10002000L
SSL_CTX_set_alpn_protos(ssl_ctx, (const unsigned char *)"\x02h2", 3);
#endif /* OPENSSL_VERSION_NUMBER >= 0x10002000L */
return (ssl_ctx);
}
static void
initialize_nghttp2_session(http2_session *session) {
nghttp2_session_callbacks *callbacks;
nghttp2_session_callbacks_new(&callbacks);
nghttp2_session_callbacks_set_on_data_chunk_recv_callback(
callbacks, on_data_chunk_recv_callback);
nghttp2_session_callbacks_set_on_stream_close_callback(
callbacks, on_stream_close_callback);
#if 0
/* Do we need it ? */
nghttp2_session_callbacks_set_on_header_callback(callbacks,
on_header_callback);
nghttp2_session_callbacks_set_on_begin_headers_callback(
callbacks, on_begin_headers_callback);
nghttp2_session_callbacks_set_on_frame_recv_callback(
callbacks, on_frame_recv_callback);
#endif
nghttp2_session_client_new(&session->ngsession, callbacks, session);
nghttp2_session_callbacks_del(callbacks);
}
static void
send_client_connection_header(http2_session *session) {
nghttp2_settings_entry iv[1] = {
{ NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 100 }
};
int rv;
rv = nghttp2_submit_settings(session->ngsession, NGHTTP2_FLAG_NONE, iv,
1);
if (rv != 0) {
/* TODO */
}
http2_do_bio(session);
}
#define MAKE_NV(NAME, VALUE, VALUELEN) \
{ \
(uint8_t *)NAME, (uint8_t *)VALUE, sizeof(NAME) - 1, VALUELEN, \
NGHTTP2_NV_FLAG_NONE \
}
#define MAKE_NV2(NAME, VALUE) \
{ \
(uint8_t *)NAME, (uint8_t *)VALUE, sizeof(NAME) - 1, \
sizeof(VALUE) - 1, NGHTTP2_NV_FLAG_NONE \
}
static ssize_t
post_read_callback(nghttp2_session *ngsession, int32_t stream_id, uint8_t *buf,
size_t length, uint32_t *data_flags,
nghttp2_data_source *source, void *user_data) {
http2_session *session = (http2_session *)user_data;
UNUSED(ngsession);
UNUSED(source);
if (session->stream->stream_id == stream_id) {
size_t len = session->stream->postdata->length -
session->stream->postdata_pos;
if (len > length) {
len = length;
}
memcpy(buf,
session->stream->postdata->base +
session->stream->postdata_pos,
len);
session->stream->postdata_pos += len;
if (session->stream->postdata_pos ==
session->stream->postdata->length) {
*data_flags |= NGHTTP2_DATA_FLAG_EOF;
}
return (len);
}
return (0);
}
/* Send HTTP request to the remote peer */
static isc_result_t
submit_request(http2_session *session) {
int32_t stream_id;
http2_stream *stream = session->stream;
char *uri = stream->uri;
struct http_parser_url *u = &stream->u;
char p[64];
snprintf(p, 64, "%u", stream->postdata->length);
nghttp2_nv hdrs[] = {
MAKE_NV2(":method", "POST"),
MAKE_NV(":scheme", &uri[u->field_data[UF_SCHEMA].off],
u->field_data[UF_SCHEMA].len),
MAKE_NV(":authority", stream->authority, stream->authoritylen),
MAKE_NV(":path", stream->path, stream->pathlen),
MAKE_NV2("content-type", "application/dns-message"),
MAKE_NV2("accept", "application/dns-message"),
MAKE_NV("content-length", p, strlen(p))
};
nghttp2_data_provider dp = { .read_callback = post_read_callback };
stream_id = nghttp2_submit_request(session->ngsession, NULL, hdrs, 7,
&dp, stream);
if (stream_id < 0) {
return (ISC_R_FAILURE);
}
stream->stream_id = stream_id;
http2_do_bio(session);
return (ISC_R_SUCCESS);
}
/*
* read callback from TLS socket.
*/
static void
readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region,
void *data) {
UNUSED(handle);
UNUSED(result);
http2_session *session = (http2_session *)data;
ssize_t readlen = nghttp2_session_mem_recv(
session->ngsession, region->base, region->length);
if (readlen < 0) {
delete_http2_session(session);
/* TODO callback! */
return;
}
if (readlen < region->length) {
INSIST(session->bufsize == 0);
INSIST(region->length - readlen < 65535);
memmove(session->buf, region->base, region->length - readlen);
session->bufsize = region->length - readlen;
isc_nm_pauseread(session->handle);
}
/* We might have something to receive or send, do IO */
http2_do_bio(session);
}
static bool
http2_do_bio(http2_session *session) {
if (nghttp2_session_want_read(session->ngsession) == 0 &&
nghttp2_session_want_write(session->ngsession) == 0)
{
delete_http2_session(session);
return (false);
}
if (nghttp2_session_want_read(session->ngsession) != 0) {
if (!session->reading) {
/* We have not yet started reading from this handle */
isc_nm_read(session->handle, readcb, session);
session->reading = true;
} else if (session->bufsize > 0) {
/* Leftover data in the buffer, use it */
size_t readlen = nghttp2_session_mem_recv(
session->ngsession, session->buf,
session->bufsize);
if (readlen == session->bufsize) {
session->bufsize = 0;
} else {
memmove(session->buf, session->buf + readlen,
session->bufsize - readlen);
session->bufsize -= readlen;
}
http2_do_bio(session);
return (false);
} else {
/* Resume reading, it's idempotent, wait for more */
isc_nm_resumeread(session->handle);
}
} else {
/* We don't want more data, stop reading for now */
isc_nm_pauseread(session->handle);
}
if (nghttp2_session_want_write(session->ngsession) != 0) {
const uint8_t *data;
/*
* XXXWPK TODO
* This function may produce very small byte string. If that
* is the case, and application disables Nagle algorithm
* (``TCP_NODELAY``), then writing this small chunk leads to
* very small packet, and it is very inefficient. An
* application should be responsible to buffer up small chunks
* of data as necessary to avoid this situation.
*/
size_t sz = nghttp2_session_mem_send(session->ngsession, &data);
isc_region_t region;
region.base = malloc(sz);
region.length = sz;
memcpy(region.base, data, sz);
isc_result_t result = isc_nm_send(session->handle, &region,
writecb, session);
if (result != ISC_R_SUCCESS) {
abort();
}
return (true);
}
return (false);
}
static void
writecb(isc_nmhandle_t *handle, isc_result_t result, void *ptr) {
UNUSED(handle);
UNUSED(result);
http2_session *session = (http2_session *)ptr;
http2_do_bio(session);
}
static void
connect_cb(isc_nmhandle_t *handle, isc_result_t result, void *arg) {
http2_session *session = (http2_session *)arg;
if (result != ISC_R_SUCCESS) {
delete_http2_session(session);
return;
}
session->handle = handle;
isc_nmhandle_ref(handle);
#if 0
/* TODO H2 */
#ifndef OPENSSL_NO_NEXTPROTONEG
SSL_get0_next_proto_negotiated(ssl, &alpn, &alpnlen);
#endif
#if OPENSSL_VERSION_NUMBER >= 0x10002000L
if (alpn == NULL) {
SSL_get0_alpn_selected(ssl, &alpn, &alpnlen);
}
#endif
if (alpn == NULL || alpnlen != 2 || memcmp("h2", alpn, 2) != 0)
{
fprintf(stderr, "h2 is not negotiated\n");
delete_http2_session(session);
return;
}
#endif
initialize_nghttp2_session(session);
send_client_connection_header(session);
submit_request(session);
http2_do_bio(session);
}
isc_result_t
isc_nm_doh_request(isc_nm_t *mgr, const char *uri, isc_region_t *message,
isc_nm_recv_cb_t cb, void *cbarg, SSL_CTX *ctx) {
uint16_t port;
char *host;
http2_session *session;
struct addrinfo hints;
struct addrinfo *res;
isc_sockaddr_t local, peer;
isc_result_t result;
if (ctx == NULL) {
ctx = create_ssl_ctx();
}
session = isc_mem_get(mgr->mctx, sizeof(http2_session));
*session = (http2_session){ .cb = cb, .cbarg = cbarg, .ctx = ctx };
isc_mem_attach(mgr->mctx, &session->mctx);
result = get_http2_stream(mgr->mctx, &session->stream, uri, &port);
if (result != ISC_R_SUCCESS) {
delete_http2_session(session);
return (result);
}
session->stream->postdata = message;
session->stream->postdata_pos = 0;
/* TODO do this properly!!! */
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_family = PF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags |= AI_CANONNAME;
host = strndup(
&session->stream
->uri[session->stream->u.field_data[UF_HOST].off],
session->stream->u.field_data[UF_HOST].len);
int s = getaddrinfo(host, NULL, &hints, &res);
free(host);
if (s != 0) {
return (ISC_R_FAILURE);
}
isc_sockaddr_fromsockaddr(&peer, res->ai_addr);
isc_sockaddr_setport(&peer, port);
isc_sockaddr_anyofpf(&local, res->ai_family);
freeaddrinfo(res);
result = isc_nm_tlsconnect(mgr, (isc_nmiface_t *)&local,
(isc_nmiface_t *)&peer, connect_cb, session,
ctx, 0);
if (result != ISC_R_SUCCESS) {
return (result);
}
return (ISC_R_SUCCESS);
}

View File

@@ -0,0 +1,652 @@
/* Copyright Joyent, Inc. and other Node contributors.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "url_parser.h"
#include <assert.h>
#include <stddef.h>
#include <ctype.h>
#include <string.h>
#include <limits.h>
#ifndef BIT_AT
# define BIT_AT(a, i) \
(!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
(1 << ((unsigned int) (i) & 7))))
#endif
#if HTTP_PARSER_STRICT
# define T(v) 0
#else
# define T(v) v
#endif
static const uint8_t normal_url_char[32] = {
/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
#undef T
enum state
{ s_dead = 1 /* important that this is > 0 */
, s_start_req_or_res
, s_res_or_resp_H
, s_start_res
, s_res_H
, s_res_HT
, s_res_HTT
, s_res_HTTP
, s_res_http_major
, s_res_http_dot
, s_res_http_minor
, s_res_http_end
, s_res_first_status_code
, s_res_status_code
, s_res_status_start
, s_res_status
, s_res_line_almost_done
, s_start_req
, s_req_method
, s_req_spaces_before_url
, s_req_schema
, s_req_schema_slash
, s_req_schema_slash_slash
, s_req_server_start
, s_req_server
, s_req_server_with_at
, s_req_path
, s_req_query_string_start
, s_req_query_string
, s_req_fragment_start
, s_req_fragment
, s_req_http_start
, s_req_http_H
, s_req_http_HT
, s_req_http_HTT
, s_req_http_HTTP
, s_req_http_I
, s_req_http_IC
, s_req_http_major
, s_req_http_dot
, s_req_http_minor
, s_req_http_end
, s_req_line_almost_done
, s_header_field_start
, s_header_field
, s_header_value_discard_ws
, s_header_value_discard_ws_almost_done
, s_header_value_discard_lws
, s_header_value_start
, s_header_value
, s_header_value_lws
, s_header_almost_done
, s_chunk_size_start
, s_chunk_size
, s_chunk_parameters
, s_chunk_size_almost_done
, s_headers_almost_done
, s_headers_done
/* Important: 's_headers_done' must be the last 'header' state. All
* states beyond this must be 'body' states. It is used for overflow
* checking. See the PARSING_HEADER() macro.
*/
, s_chunk_data
, s_chunk_data_almost_done
, s_chunk_data_done
, s_body_identity
, s_body_identity_eof
, s_message_done
};
enum http_host_state
{
s_http_host_dead = 1
, s_http_userinfo_start
, s_http_userinfo
, s_http_host_start
, s_http_host_v6_start
, s_http_host
, s_http_host_v6
, s_http_host_v6_end
, s_http_host_v6_zone_start
, s_http_host_v6_zone
, s_http_host_port_start
, s_http_host_port
};
/* Macros for character classes; depends on strict-mode */
#define CR '\r'
#define LF '\n'
#define LOWER(c) (unsigned char)(c | 0x20)
#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
(c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
(c) == ')')
#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
(c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
(c) == '$' || (c) == ',')
#define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
#if HTTP_PARSER_STRICT
#define TOKEN(c) STRICT_TOKEN(c)
#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
#else
#define TOKEN(c) tokens[(unsigned char)c]
#define IS_URL_CHAR(c) \
(BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
#define IS_HOST_CHAR(c) \
(IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
#endif
/* Our URL parser.
*
* This is designed to be shared by http_parser_execute() for URL validation,
* hence it has a state transition + byte-for-byte interface. In addition, it
* is meant to be embedded in http_parser_parse_url(), which does the dirty
* work of turning state transitions URL components for its API.
*
* This function should only be invoked with non-space characters. It is
* assumed that the caller cares about (and can detect) the transition between
* URL and non-URL states by looking for these.
*/
static enum state
parse_url_char(enum state s, const char ch)
{
if (ch == ' ' || ch == '\r' || ch == '\n') {
return s_dead;
}
#if HTTP_PARSER_STRICT
if (ch == '\t' || ch == '\f') {
return s_dead;
}
#endif
switch (s) {
case s_req_spaces_before_url:
/* Proxied requests are followed by scheme of an absolute URI (alpha).
* All methods except CONNECT are followed by '/' or '*'.
*/
if (ch == '/' || ch == '*') {
return s_req_path;
}
if (IS_ALPHA(ch)) {
return s_req_schema;
}
break;
case s_req_schema:
if (IS_ALPHA(ch)) {
return s;
}
if (ch == ':') {
return s_req_schema_slash;
}
break;
case s_req_schema_slash:
if (ch == '/') {
return s_req_schema_slash_slash;
}
break;
case s_req_schema_slash_slash:
if (ch == '/') {
return s_req_server_start;
}
break;
case s_req_server_with_at:
if (ch == '@') {
return s_dead;
}
/* fall through */
case s_req_server_start:
case s_req_server:
if (ch == '/') {
return s_req_path;
}
if (ch == '?') {
return s_req_query_string_start;
}
if (ch == '@') {
return s_req_server_with_at;
}
if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
return s_req_server;
}
break;
case s_req_path:
if (IS_URL_CHAR(ch)) {
return s;
}
switch (ch) {
case '?':
return s_req_query_string_start;
case '#':
return s_req_fragment_start;
}
break;
case s_req_query_string_start:
case s_req_query_string:
if (IS_URL_CHAR(ch)) {
return s_req_query_string;
}
switch (ch) {
case '?':
/* allow extra '?' in query string */
return s_req_query_string;
case '#':
return s_req_fragment_start;
}
break;
case s_req_fragment_start:
if (IS_URL_CHAR(ch)) {
return s_req_fragment;
}
switch (ch) {
case '?':
return s_req_fragment;
case '#':
return s;
}
break;
case s_req_fragment:
if (IS_URL_CHAR(ch)) {
return s;
}
switch (ch) {
case '?':
case '#':
return s;
}
break;
default:
break;
}
/* We should never fall out of the switch above unless there's an error */
return s_dead;
}
static enum http_host_state
http_parse_host_char(enum http_host_state s, const char ch) {
switch(s) {
case s_http_userinfo:
case s_http_userinfo_start:
if (ch == '@') {
return s_http_host_start;
}
if (IS_USERINFO_CHAR(ch)) {
return s_http_userinfo;
}
break;
case s_http_host_start:
if (ch == '[') {
return s_http_host_v6_start;
}
if (IS_HOST_CHAR(ch)) {
return s_http_host;
}
break;
case s_http_host:
if (IS_HOST_CHAR(ch)) {
return s_http_host;
}
/* fall through */
case s_http_host_v6_end:
if (ch == ':') {
return s_http_host_port_start;
}
break;
case s_http_host_v6:
if (ch == ']') {
return s_http_host_v6_end;
}
/* fall through */
case s_http_host_v6_start:
if (IS_HEX(ch) || ch == ':' || ch == '.') {
return s_http_host_v6;
}
if (s == s_http_host_v6 && ch == '%') {
return s_http_host_v6_zone_start;
}
break;
case s_http_host_v6_zone:
if (ch == ']') {
return s_http_host_v6_end;
}
/* fall through */
case s_http_host_v6_zone_start:
/* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
ch == '~') {
return s_http_host_v6_zone;
}
break;
case s_http_host_port:
case s_http_host_port_start:
if (IS_NUM(ch)) {
return s_http_host_port;
}
break;
default:
break;
}
return s_http_host_dead;
}
static int
http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
enum http_host_state s;
const char *p;
size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
assert(u->field_set & (1 << UF_HOST));
u->field_data[UF_HOST].len = 0;
s = found_at ? s_http_userinfo_start : s_http_host_start;
for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
enum http_host_state new_s = http_parse_host_char(s, *p);
if (new_s == s_http_host_dead) {
return 1;
}
switch(new_s) {
case s_http_host:
if (s != s_http_host) {
u->field_data[UF_HOST].off = (uint16_t)(p - buf);
}
u->field_data[UF_HOST].len++;
break;
case s_http_host_v6:
if (s != s_http_host_v6) {
u->field_data[UF_HOST].off = (uint16_t)(p - buf);
}
u->field_data[UF_HOST].len++;
break;
case s_http_host_v6_zone_start:
case s_http_host_v6_zone:
u->field_data[UF_HOST].len++;
break;
case s_http_host_port:
if (s != s_http_host_port) {
u->field_data[UF_PORT].off = (uint16_t)(p - buf);
u->field_data[UF_PORT].len = 0;
u->field_set |= (1 << UF_PORT);
}
u->field_data[UF_PORT].len++;
break;
case s_http_userinfo:
if (s != s_http_userinfo) {
u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
u->field_data[UF_USERINFO].len = 0;
u->field_set |= (1 << UF_USERINFO);
}
u->field_data[UF_USERINFO].len++;
break;
default:
break;
}
s = new_s;
}
/* Make sure we don't end somewhere unexpected */
switch (s) {
case s_http_host_start:
case s_http_host_v6_start:
case s_http_host_v6:
case s_http_host_v6_zone_start:
case s_http_host_v6_zone:
case s_http_host_port_start:
case s_http_userinfo:
case s_http_userinfo_start:
return 1;
default:
break;
}
return 0;
}
void
http_parser_url_init(struct http_parser_url *u) {
memset(u, 0, sizeof(*u));
}
int
http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
struct http_parser_url *u)
{
enum state s;
const char *p;
enum http_parser_url_fields uf, old_uf;
int found_at = 0;
if (buflen == 0) {
return 1;
}
u->port = u->field_set = 0;
s = is_connect ? s_req_server_start : s_req_spaces_before_url;
old_uf = UF_MAX;
for (p = buf; p < buf + buflen; p++) {
s = parse_url_char(s, *p);
/* Figure out the next field that we're operating on */
switch (s) {
case s_dead:
return 1;
/* Skip delimeters */
case s_req_schema_slash:
case s_req_schema_slash_slash:
case s_req_server_start:
case s_req_query_string_start:
case s_req_fragment_start:
continue;
case s_req_schema:
uf = UF_SCHEMA;
break;
case s_req_server_with_at:
found_at = 1;
/* fall through */
case s_req_server:
uf = UF_HOST;
break;
case s_req_path:
uf = UF_PATH;
break;
case s_req_query_string:
uf = UF_QUERY;
break;
case s_req_fragment:
uf = UF_FRAGMENT;
break;
default:
assert(!"Unexpected state");
return 1;
}
/* Nothing's changed; soldier on */
if (uf == old_uf) {
u->field_data[uf].len++;
continue;
}
u->field_data[uf].off = (uint16_t)(p - buf);
u->field_data[uf].len = 1;
u->field_set |= (1 << uf);
old_uf = uf;
}
/* host must be present if there is a schema */
/* parsing http:///toto will fail */
if ((u->field_set & (1 << UF_SCHEMA)) &&
(u->field_set & (1 << UF_HOST)) == 0) {
return 1;
}
if (u->field_set & (1 << UF_HOST)) {
if (http_parse_host(buf, u, found_at) != 0) {
return 1;
}
}
/* CONNECT requests can only contain "hostname:port" */
if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
return 1;
}
if (u->field_set & (1 << UF_PORT)) {
uint16_t off;
uint16_t len;
const char* pp;
const char* end;
unsigned long v;
off = u->field_data[UF_PORT].off;
len = u->field_data[UF_PORT].len;
end = buf + off + len;
/* NOTE: The characters are already validated and are in the [0-9] range */
assert(off + len <= buflen && "Port number overflow");
v = 0;
for (pp = buf + off; pp < end; p++) {
v *= 10;
v += *pp - '0';
/* Ports have a max value of 2^16 */
if (v > 0xffff) {
return 1;
}
}
u->port = (uint16_t) v;
}
return 0;
}

View File

@@ -0,0 +1,94 @@
/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef url_parser_h
#define url_parser_h
#ifdef __cplusplus
extern "C" {
#endif
/* Also update SONAME in the Makefile whenever you change these. */
#define HTTP_PARSER_VERSION_MAJOR 2
#define HTTP_PARSER_VERSION_MINOR 9
#define HTTP_PARSER_VERSION_PATCH 1
#include <stddef.h>
#if defined(_WIN32) && !defined(__MINGW32__) && \
(!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__)
#include <BaseTsd.h>
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#else
#include <stdint.h>
#endif
/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
* faster
*/
#ifndef HTTP_PARSER_STRICT
# define HTTP_PARSER_STRICT 1
#endif
enum http_parser_url_fields
{ UF_SCHEMA = 0
, UF_HOST = 1
, UF_PORT = 2
, UF_PATH = 3
, UF_QUERY = 4
, UF_FRAGMENT = 5
, UF_USERINFO = 6
, UF_MAX = 7
};
/* Result structure for http_parser_parse_url().
*
* Callers should index into field_data[] with UF_* values iff field_set
* has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
* because we probably have padding left over), we convert any port to
* a uint16_t.
*/
struct http_parser_url {
uint16_t field_set; /* Bitmask of (1 << UF_*) values */
uint16_t port; /* Converted UF_PORT string */
struct {
uint16_t off; /* Offset into buffer in which field starts */
uint16_t len; /* Length of run in buffer */
} field_data[UF_MAX];
};
/* Initialize all http_parser_url members to 0 */
void http_parser_url_init(struct http_parser_url *u);
/* Parse a URL; return nonzero on failure */
int http_parser_parse_url(const char *buf, size_t buflen,
int is_connect,
struct http_parser_url *u);
#ifdef __cplusplus
}
#endif
#endif