diff --git a/config.m4 b/config.m4 index eca408c..5309c51 100644 --- a/config.m4 +++ b/config.m4 @@ -2,23 +2,6 @@ PHP_ARG_ENABLE(mailparse, whether to enable mailparse support, [ --enable-mailparse Enable mailparse support.]) if test "$PHP_MAILPARSE" != "no"; then - if test "$ext_shared" != "yes" && test "$enable_mbstring" != "yes"; then - AC_MSG_WARN(Activating mbstring) - enable_mbstring=yes - fi - - AC_MSG_CHECKING(libmbfl headers) - if test -f $abs_srcdir/ext/mbstring/libmbfl/mbfl/mbfilter.h; then - dnl build in php-src tree - AC_MSG_RESULT(found in $abs_srcdir/ext/mbstring) - elif test -f $phpincludedir/ext/mbstring/libmbfl/mbfl/mbfilter.h; then - dnl build alone - AC_MSG_RESULT(found in $phpincludedir/ext/mbstring) - else - AC_MSG_ERROR(mbstring extension with libmbfl is missing) - fi - - PHP_NEW_EXTENSION(mailparse, mailparse.c php_mailparse_mime.c php_mailparse_rfc822.c, $ext_shared) - PHP_ADD_EXTENSION_DEP(mailparse, mbstring, true) + PHP_NEW_EXTENSION(mailparse, mailparse.c php_mailparse_mime.c php_mailparse_rfc822.c mailparse_encoding.c, $ext_shared) PHP_ADD_MAKEFILE_FRAGMENT fi diff --git a/config.w32 b/config.w32 index 772d372..383bec8 100644 --- a/config.w32 +++ b/config.w32 @@ -3,17 +3,9 @@ ARG_ENABLE("mailparse", "MAILPARSE support", "no"); if (PHP_MAILPARSE != "no") { + EXTENSION('mailparse', 'mailparse.c php_mailparse_mime.c php_mailparse_rfc822.c mailparse_encoding.c'); + AC_DEFINE('HAVE_MAILPARSE', 1); - if (PHP_MBSTRING == "no") { - WARNING("mailparse requires mbstring"); - } else { - EXTENSION('mailparse', 'mailparse.c php_mailparse_mime.c php_mailparse_rfc822.c'); - AC_DEFINE('HAVE_MAILPARSE', 1); - - -// MFO.WriteLine(configure_module_dirname + "\\php_mailparse_rfc822.c: " + configure_module_dirname + "\\php_mailparse_rfc822.re"); -// MFO.WriteLine("\t$(RE2C) -b " + configure_module_dirname + "\\php_mailparse_rfc822.re > " + configure_module_dirname + "\\php_mailparse_rfc822.c"); - - ADD_EXTENSION_DEP('mailparse', 'mbstring'); - } +// MFO.WriteLine(configure_module_dirname + "\\php_mailparse_rfc822.c: " + configure_module_dirname + "\\php_mailparse_rfc822.re"); +// MFO.WriteLine("\t$(RE2C) -b " + configure_module_dirname + "\\php_mailparse_rfc822.re > " + configure_module_dirname + "\\php_mailparse_rfc822.c"); } diff --git a/mailparse.c b/mailparse.c index 5dac132..ba65418 100644 --- a/mailparse.c +++ b/mailparse.c @@ -28,11 +28,6 @@ #include "arginfo.h" -/* just in case the config check doesn't enable mbstring automatically */ -#if !HAVE_MBSTRING -#error The mailparse extension requires the mbstring extension! -#endif - #define MAILPARSE_DECODE_NONE 0 /* include headers and leave section untouched */ #define MAILPARSE_DECODE_8BIT 1 /* decode body into 8-bit */ #define MAILPARSE_DECODE_NOHEADERS 2 /* don't include the headers */ @@ -92,15 +87,8 @@ zend_function_entry mailparse_functions[] = { PHP_FE_END }; -static const zend_module_dep mailparse_deps[] = { - ZEND_MOD_REQUIRED("mbstring") - ZEND_MOD_END -}; - zend_module_entry mailparse_module_entry = { - STANDARD_MODULE_HEADER_EX, - NULL, - mailparse_deps, + STANDARD_MODULE_HEADER, "mailparse", mailparse_functions, PHP_MINIT(mailparse), @@ -914,7 +902,7 @@ PHP_FUNCTION(mailparse_determine_best_xfer_encoding) int longline = 0; int linelen = 0; int c; - enum mbfl_no_encoding bestenc = mbfl_no_encoding_7bit; + enum mb_no_encoding bestenc = mb_no_encoding_7bit; php_stream *stream; char * name; @@ -930,9 +918,9 @@ PHP_FUNCTION(mailparse_determine_best_xfer_encoding) if (c == EOF) break; if (c > 0x80) - bestenc = mbfl_no_encoding_8bit; + bestenc = mb_no_encoding_8bit; else if (c == 0) { - bestenc = mbfl_no_encoding_base64; + bestenc = mb_no_encoding_base64; longline = 0; break; } @@ -942,20 +930,28 @@ PHP_FUNCTION(mailparse_determine_best_xfer_encoding) longline = 1; } if (longline) { - bestenc = mbfl_no_encoding_qprint; + bestenc = mb_no_encoding_qprint; } php_stream_rewind(stream); -#if PHP_VERSION_ID < 80100 - name = (char *)mbfl_no2preferred_mime_name(bestenc); -#else - name = (char *)mbfl_encoding_preferred_mime_name(mbfl_no2encoding(bestenc)); -#endif - if (name) { - RETVAL_STRING(name); - } else { - RETVAL_FALSE; + /* Get the MIME name for the encoding */ + switch (bestenc) { + case mb_no_encoding_7bit: + name = "7bit"; + break; + case mb_no_encoding_8bit: + name = "8bit"; + break; + case mb_no_encoding_base64: + name = "BASE64"; + break; + case mb_no_encoding_qprint: + name = "quoted-printable"; + break; + default: + RETURN_FALSE; } + RETVAL_STRING(name); } /* }}} */ @@ -981,9 +977,9 @@ PHP_FUNCTION(mailparse_stream_encode) char *buf; size_t len; size_t bufsize = 2048; - const mbfl_encoding *encoding; - enum mbfl_no_encoding enc; - mbfl_convert_filter *conv = NULL; + const mb_encoding *encoding; + enum mb_no_encoding enc; + mb_convert_filter *conv = NULL; if (zend_parse_parameters(ZEND_NUM_ARGS(), "rrS", &srcfile, &destfile, &encod) == FAILURE) { RETURN_FALSE; @@ -999,7 +995,7 @@ PHP_FUNCTION(mailparse_stream_encode) php_stream_from_zval(srcstream, srcfile); php_stream_from_zval(deststream, destfile); - encoding = mbfl_name2encoding(ZSTR_VAL(encod)); + encoding = mb_name2encoding(ZSTR_VAL(encod)); if (encoding) { enc = encoding->no_encoding; } else { @@ -1013,14 +1009,14 @@ PHP_FUNCTION(mailparse_stream_encode) buf = emalloc(bufsize); RETVAL_TRUE; - conv = mbfl_convert_filter_new(mbfl_no2encoding(mbfl_no_encoding_8bit), - mbfl_no2encoding(enc), + conv = mb_convert_filter_new(mb_no2encoding(mb_no_encoding_8bit), + mb_no2encoding(enc), mailparse_stream_output, mailparse_stream_flush, deststream ); - if (enc == mbfl_no_encoding_qprint) { + if (enc == mb_no_encoding_qprint) { /* If the qp encoded section is going to be digitally signed, * it is a good idea to make sure that lines that begin "From " * have the letter F encoded, so that MTAs do not stick a > character @@ -1032,7 +1028,7 @@ PHP_FUNCTION(mailparse_stream_encode) len = strlen(buf); if (strncmp(buf, "From ", 5) == 0) { - mbfl_convert_filter_flush(conv); + mb_convert_filter_flush(conv); php_stream_write(deststream, "=46rom ", 7); i = 5; } else { @@ -1040,7 +1036,7 @@ PHP_FUNCTION(mailparse_stream_encode) } for (; i + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* Forward declarations */ +static int mb_filt_conv_base64enc(int c, mb_convert_filter *filter); +static int mb_filt_conv_base64enc_flush(mb_convert_filter *filter); +static int mb_filt_conv_base64dec(int c, mb_convert_filter *filter); +static int mb_filt_conv_base64dec_flush(mb_convert_filter *filter); +static int mb_filt_conv_qprintenc(int c, mb_convert_filter *filter); +static int mb_filt_conv_qprintenc_flush(mb_convert_filter *filter); +static int mb_filt_conv_qprintdec(int c, mb_convert_filter *filter); +static int mb_filt_conv_qprintdec_flush(mb_convert_filter *filter); +static void mb_filt_conv_common_ctor(mb_convert_filter *filter); + +/* Encoding definitions */ +static const mb_encoding mb_encoding_7bit = { mb_no_encoding_7bit, "7bit" }; +static const mb_encoding mb_encoding_8bit = { mb_no_encoding_8bit, "8bit" }; +static const mb_encoding mb_encoding_base64 = { mb_no_encoding_base64, "BASE64" }; +static const mb_encoding mb_encoding_qprint = { mb_no_encoding_qprint, "Quoted-Printable" }; + +/* Virtual tables for conversions */ +static const mb_convert_vtbl vtbl_8bit_b64 = { + mb_no_encoding_8bit, + mb_no_encoding_base64, + mb_filt_conv_common_ctor, + NULL, + mb_filt_conv_base64enc, + mb_filt_conv_base64enc_flush +}; + +static const mb_convert_vtbl vtbl_b64_8bit = { + mb_no_encoding_base64, + mb_no_encoding_8bit, + mb_filt_conv_common_ctor, + NULL, + mb_filt_conv_base64dec, + mb_filt_conv_base64dec_flush +}; + +static const mb_convert_vtbl vtbl_8bit_qprint = { + mb_no_encoding_8bit, + mb_no_encoding_qprint, + mb_filt_conv_common_ctor, + NULL, + mb_filt_conv_qprintenc, + mb_filt_conv_qprintenc_flush +}; + +static const mb_convert_vtbl vtbl_qprint_8bit = { + mb_no_encoding_qprint, + mb_no_encoding_8bit, + mb_filt_conv_common_ctor, + NULL, + mb_filt_conv_qprintdec, + mb_filt_conv_qprintdec_flush +}; + +/* ============================================================================= + * BASE64 encoding/decoding + * ============================================================================= */ + +static const unsigned char mb_base64_table[] = { + /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */ + 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d, + /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */ + 0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a, + /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */ + 0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d, + /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */ + 0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a, + /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */ + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00 +}; + +/* any => BASE64 */ +static int mb_filt_conv_base64enc(int c, mb_convert_filter *filter) +{ + int n; + + n = (filter->status & 0xff); + if (n == 0) { + filter->status++; + filter->cache = (c & 0xff) << 16; + } else if (n == 1) { + filter->status++; + filter->cache |= (c & 0xff) << 8; + } else { + filter->status &= ~0xff; + n = (filter->status & 0xff00) >> 8; + if (n > 72) { + CK((*filter->output_function)(0x0d, filter->data)); /* CR */ + CK((*filter->output_function)(0x0a, filter->data)); /* LF */ + filter->status &= ~0xff00; + } + filter->status += 0x400; + n = filter->cache | (c & 0xff); + CK((*filter->output_function)(mb_base64_table[(n >> 18) & 0x3f], filter->data)); + CK((*filter->output_function)(mb_base64_table[(n >> 12) & 0x3f], filter->data)); + CK((*filter->output_function)(mb_base64_table[(n >> 6) & 0x3f], filter->data)); + CK((*filter->output_function)(mb_base64_table[n & 0x3f], filter->data)); + } + + return 0; +} + +static int mb_filt_conv_base64enc_flush(mb_convert_filter *filter) +{ + int status, cache, len; + + status = filter->status & 0xff; + cache = filter->cache; + len = (filter->status & 0xff00) >> 8; + filter->status &= ~0xffff; + filter->cache = 0; + /* flush fragments */ + if (status >= 1) { + if (len > 72){ + CK((*filter->output_function)(0x0d, filter->data)); /* CR */ + CK((*filter->output_function)(0x0a, filter->data)); /* LF */ + } + CK((*filter->output_function)(mb_base64_table[(cache >> 18) & 0x3f], filter->data)); + CK((*filter->output_function)(mb_base64_table[(cache >> 12) & 0x3f], filter->data)); + if (status == 1) { + CK((*filter->output_function)(0x3d, filter->data)); /* '=' */ + CK((*filter->output_function)(0x3d, filter->data)); /* '=' */ + } else { + CK((*filter->output_function)(mb_base64_table[(cache >> 6) & 0x3f], filter->data)); + CK((*filter->output_function)(0x3d, filter->data)); /* '=' */ + } + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +/* BASE64 => any */ +static int mb_filt_conv_base64dec(int c, mb_convert_filter *filter) +{ + int n; + + if (c == 0x0d || c == 0x0a || c == 0x20 || c == 0x09 || c == 0x3d) { /* CR or LF or SPACE or HTAB or '=' */ + return 0; + } + + n = 0; + if (c >= 0x41 && c <= 0x5a) { /* A - Z */ + n = c - 65; + } else if (c >= 0x61 && c <= 0x7a) { /* a - z */ + n = c - 71; + } else if (c >= 0x30 && c <= 0x39) { /* 0 - 9 */ + n = c + 4; + } else if (c == 0x2b) { /* '+' */ + n = 62; + } else if (c == 0x2f) { /* '/' */ + n = 63; + } else { + /* Invalid character - output a marker but continue */ + return 0; + } + n &= 0x3f; + + switch (filter->status) { + case 0: + filter->status = 1; + filter->cache = n << 18; + break; + case 1: + filter->status = 2; + filter->cache |= n << 12; + break; + case 2: + filter->status = 3; + filter->cache |= n << 6; + break; + default: + filter->status = 0; + n |= filter->cache; + CK((*filter->output_function)((n >> 16) & 0xff, filter->data)); + CK((*filter->output_function)((n >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(n & 0xff, filter->data)); + break; + } + + return 0; +} + +static int mb_filt_conv_base64dec_flush(mb_convert_filter *filter) +{ + int status, cache; + + status = filter->status; + cache = filter->cache; + filter->status = 0; + filter->cache = 0; + /* flush fragments */ + if (status >= 2) { + CK((*filter->output_function)((cache >> 16) & 0xff, filter->data)); + if (status >= 3) { + CK((*filter->output_function)((cache >> 8) & 0xff, filter->data)); + } + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +/* ============================================================================= + * Quoted-Printable encoding/decoding + * ============================================================================= */ + +static int hex2code_map[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}; + +/* any => Quoted-Printable */ +static int mb_filt_conv_qprintenc(int c, mb_convert_filter *filter) +{ + int s, n; + + switch (filter->status & 0xff) { + case 0: + filter->cache = c; + filter->status++; + break; + default: + s = filter->cache; + filter->cache = c; + n = (filter->status & 0xff00) >> 8; + + if (s == 0) { /* null */ + CK((*filter->output_function)(s, filter->data)); + filter->status &= ~0xff00; + break; + } + + if (s == '\n' || (s == '\r' && c != '\n')) { /* line feed */ + CK((*filter->output_function)('\r', filter->data)); + CK((*filter->output_function)('\n', filter->data)); + filter->status &= ~0xff00; + break; + } else if (s == 0x0d) { + break; + } + + if (n >= 72) { /* soft line feed */ + CK((*filter->output_function)('=', filter->data)); + CK((*filter->output_function)('\r', filter->data)); + CK((*filter->output_function)('\n', filter->data)); + filter->status &= ~0xff00; + } + + if (s <= 0 || s >= 0x80 || s == '=') { /* not ASCII or '=' */ + /* hex-octet */ + CK((*filter->output_function)('=', filter->data)); + n = (s >> 4) & 0xf; + if (n < 10) { + n += 48; /* '0' */ + } else { + n += 55; /* 'A' - 10 */ + } + CK((*filter->output_function)(n, filter->data)); + n = s & 0xf; + if (n < 10) { + n += 48; + } else { + n += 55; + } + CK((*filter->output_function)(n, filter->data)); + filter->status += 0x300; + } else { + CK((*filter->output_function)(s, filter->data)); + filter->status += 0x100; + } + break; + } + + return 0; +} + +static int mb_filt_conv_qprintenc_flush(mb_convert_filter *filter) +{ + /* flush filter cache */ + (*filter->filter_function)('\0', filter); + filter->status &= ~0xffff; + filter->cache = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +/* Quoted-Printable => any */ +static int mb_filt_conv_qprintdec(int c, mb_convert_filter *filter) +{ + int n, m; + + switch (filter->status) { + case 1: + if (hex2code_map[c & 0xff] >= 0) { + filter->cache = c; + filter->status = 2; + } else if (c == 0x0d) { /* soft line feed */ + filter->status = 3; + } else if (c == 0x0a) { /* soft line feed */ + filter->status = 0; + } else { + CK((*filter->output_function)(0x3d, filter->data)); /* '=' */ + CK((*filter->output_function)(c, filter->data)); + filter->status = 0; + } + break; + case 2: + m = hex2code_map[c & 0xff]; + if (m < 0) { + CK((*filter->output_function)(0x3d, filter->data)); /* '=' */ + CK((*filter->output_function)(filter->cache, filter->data)); + n = c; + } else { + n = hex2code_map[filter->cache] << 4 | m; + } + CK((*filter->output_function)(n, filter->data)); + filter->status = 0; + break; + case 3: + if (c != 0x0a) { /* LF */ + CK((*filter->output_function)(c, filter->data)); + } + filter->status = 0; + break; + default: + if (c == 0x3d) { /* '=' */ + filter->status = 1; + } else { + CK((*filter->output_function)(c, filter->data)); + } + break; + } + + return 0; +} + +static int mb_filt_conv_qprintdec_flush(mb_convert_filter *filter) +{ + int status, cache; + + status = filter->status; + cache = filter->cache; + filter->status = 0; + filter->cache = 0; + /* flush fragments */ + if (status == 1) { + CK((*filter->output_function)(0x3d, filter->data)); /* '=' */ + } else if (status == 2) { + CK((*filter->output_function)(0x3d, filter->data)); /* '=' */ + CK((*filter->output_function)(cache, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +/* ============================================================================= + * Filter infrastructure + * ============================================================================= */ + +static void mb_filt_conv_common_ctor(mb_convert_filter *filter) +{ + filter->status = 0; + filter->cache = 0; +} + +static const mb_convert_vtbl* mb_convert_filter_get_vtbl(const mb_encoding *from, const mb_encoding *to) +{ + if (from->no_encoding == mb_no_encoding_8bit && to->no_encoding == mb_no_encoding_base64) { + return &vtbl_8bit_b64; + } else if (from->no_encoding == mb_no_encoding_base64 && to->no_encoding == mb_no_encoding_8bit) { + return &vtbl_b64_8bit; + } else if (from->no_encoding == mb_no_encoding_8bit && to->no_encoding == mb_no_encoding_qprint) { + return &vtbl_8bit_qprint; + } else if (from->no_encoding == mb_no_encoding_qprint && to->no_encoding == mb_no_encoding_8bit) { + return &vtbl_qprint_8bit; + } + return NULL; +} + +static void mb_convert_filter_init(mb_convert_filter *filter, const mb_encoding *from, const mb_encoding *to, + const mb_convert_vtbl *vtbl, mb_output_function_t output_function, mb_flush_function_t flush_function, void* data) +{ + filter->from = from; + filter->to = to; + filter->output_function = output_function; + filter->flush_function = flush_function; + filter->data = data; + filter->filter_dtor = vtbl->filter_dtor; + filter->filter_function = vtbl->filter_function; + filter->filter_flush = vtbl->filter_flush; + + if (vtbl->filter_ctor) { + (*vtbl->filter_ctor)(filter); + } +} + +mb_convert_filter* mb_convert_filter_new(const mb_encoding *from, const mb_encoding *to, + mb_output_function_t output_function, mb_flush_function_t flush_function, void* data) +{ + const mb_convert_vtbl *vtbl = mb_convert_filter_get_vtbl(from, to); + if (vtbl == NULL) { + return NULL; + } + + mb_convert_filter *filter = emalloc(sizeof(mb_convert_filter)); + mb_convert_filter_init(filter, from, to, vtbl, output_function, flush_function, data); + return filter; +} + +void mb_convert_filter_delete(mb_convert_filter *filter) +{ + if (filter->filter_dtor) { + (*filter->filter_dtor)(filter); + } + efree(filter); +} + +int mb_convert_filter_feed(int c, mb_convert_filter *filter) +{ + return (*filter->filter_function)(c, filter); +} + +int mb_convert_filter_flush(mb_convert_filter *filter) +{ + (*filter->filter_flush)(filter); + return 0; +} + +/* ============================================================================= + * Encoding lookup functions + * ============================================================================= */ + +const mb_encoding* mb_name2encoding(const char *name) +{ + if (name == NULL) { + return NULL; + } + + /* Case-insensitive comparison for encoding names */ + if (strcasecmp(name, "base64") == 0 || strcasecmp(name, "BASE64") == 0) { + return &mb_encoding_base64; + } else if (strcasecmp(name, "quoted-printable") == 0 || strcasecmp(name, "qprint") == 0) { + return &mb_encoding_qprint; + } else if (strcasecmp(name, "8bit") == 0) { + return &mb_encoding_8bit; + } else if (strcasecmp(name, "7bit") == 0) { + return &mb_encoding_7bit; + } + + return NULL; +} + +const mb_encoding* mb_no2encoding(enum mb_no_encoding no_encoding) +{ + switch (no_encoding) { + case mb_no_encoding_base64: + return &mb_encoding_base64; + case mb_no_encoding_qprint: + return &mb_encoding_qprint; + case mb_no_encoding_8bit: + return &mb_encoding_8bit; + case mb_no_encoding_7bit: + return &mb_encoding_7bit; + default: + return NULL; + } +} diff --git a/mailparse_encoding.h b/mailparse_encoding.h new file mode 100644 index 0000000..0ae4b77 --- /dev/null +++ b/mailparse_encoding.h @@ -0,0 +1,98 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available at through the world-wide-web at | + | http://www.php.net/license/3_01.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +/* + * Vendored encoding support for mailparse + * + * This code is derived from libmbfl, as modified in the mbstring extension, + * and has been adapted for standalone use in mailparse. + * + * Original libmbfl code copyright (c) 1998-2002 HappySize, Inc. + * Licensed under GNU Lesser General Public License (version 2) + */ + +#ifndef MAILPARSE_ENCODING_H +#define MAILPARSE_ENCODING_H + +#include "php.h" + +/* Encoding identifiers */ +enum mb_no_encoding { + mb_no_encoding_invalid = -1, + mb_no_encoding_7bit, + mb_no_encoding_8bit, + mb_no_encoding_base64, + mb_no_encoding_qprint +}; + +/* Forward declarations */ +typedef struct _mb_convert_filter mb_convert_filter; +typedef struct _mb_encoding mb_encoding; +typedef struct mb_convert_vtbl mb_convert_vtbl; + +/* Function pointer types */ +typedef int (*mb_output_function_t)(int c, void *data); +typedef int (*mb_flush_function_t)(void *data); +typedef int (*mb_filter_function_t)(int c, mb_convert_filter *filter); +typedef int (*mb_filter_flush_t)(mb_convert_filter *filter); +typedef void (*mb_filter_ctor_t)(mb_convert_filter *filter); +typedef void (*mb_filter_dtor_t)(mb_convert_filter *filter); + +/* Encoding structure */ +struct _mb_encoding { + enum mb_no_encoding no_encoding; + const char *name; +}; + +/* Virtual table for convert filters */ +struct mb_convert_vtbl { + enum mb_no_encoding from; + enum mb_no_encoding to; + mb_filter_ctor_t filter_ctor; + mb_filter_dtor_t filter_dtor; + mb_filter_function_t filter_function; + mb_filter_flush_t filter_flush; +}; + +/* Convert filter structure */ +struct _mb_convert_filter { + mb_filter_dtor_t filter_dtor; + mb_filter_function_t filter_function; + mb_filter_flush_t filter_flush; + mb_output_function_t output_function; + mb_flush_function_t flush_function; + void *data; + int status; + int cache; + const mb_encoding *from; + const mb_encoding *to; +}; + +/* Public API functions */ +mb_convert_filter* mb_convert_filter_new( + const mb_encoding *from, + const mb_encoding *to, + mb_output_function_t output_function, + mb_flush_function_t flush_function, + void *data +); + +void mb_convert_filter_delete(mb_convert_filter *filter); +int mb_convert_filter_feed(int c, mb_convert_filter *filter); +int mb_convert_filter_flush(mb_convert_filter *filter); + +const mb_encoding* mb_name2encoding(const char *name); +const mb_encoding* mb_no2encoding(enum mb_no_encoding no_encoding); + +#endif /* MAILPARSE_ENCODING_H */ diff --git a/php_mailparse.h b/php_mailparse.h index e8f5584..bf1ed13 100644 --- a/php_mailparse.h +++ b/php_mailparse.h @@ -76,8 +76,7 @@ PHP_METHOD(mimemessage, extract_uue); PHP_METHOD(mimemessage, remove); PHP_METHOD(mimemessage, add_child); -# include "ext/mbstring/libmbfl/mbfl/mbfilter.h" - +#include "mailparse_encoding.h" #include "php_mailparse_rfc822.h" #include "php_mailparse_mime.h" diff --git a/php_mailparse_mime.c b/php_mailparse_mime.c index 87794b0..afa28bd 100644 --- a/php_mailparse_mime.c +++ b/php_mailparse_mime.c @@ -914,20 +914,20 @@ static int filter_into_work_buffer(int c, void *dat) PHP_MAILPARSE_API void php_mimepart_decoder_prepare(php_mimepart *part, int do_decode, php_mimepart_extract_func_t decoder, void *ptr) { - const mbfl_encoding *encoding; - enum mbfl_no_encoding from = mbfl_no_encoding_8bit; + const mb_encoding *encoding; + enum mb_no_encoding from = mb_no_encoding_8bit; if (do_decode && part->content_transfer_encoding) { - encoding = mbfl_name2encoding(part->content_transfer_encoding); + encoding = mb_name2encoding(part->content_transfer_encoding); if (encoding) { from = encoding->no_encoding; } else { if (strcasecmp("binary", part->content_transfer_encoding) != 0) { - zend_error(E_WARNING, "%s(): mbstring doesn't know how to decode %s transfer encoding!", + zend_error(E_WARNING, "%s(): unknown transfer encoding %s!", get_active_function_name(), part->content_transfer_encoding); } - from = mbfl_no_encoding_8bit; + from = mb_no_encoding_8bit; } } @@ -936,11 +936,11 @@ PHP_MAILPARSE_API void php_mimepart_decoder_prepare(php_mimepart *part, int do_d part->parsedata.workbuf.len = 0; if (do_decode) { - if (from == mbfl_no_encoding_8bit || from == mbfl_no_encoding_7bit) { + if (from == mb_no_encoding_8bit || from == mb_no_encoding_7bit) { part->extract_filter = NULL; } else { - part->extract_filter = mbfl_convert_filter_new( - mbfl_no2encoding(from), mbfl_no2encoding(mbfl_no_encoding_8bit), + part->extract_filter = mb_convert_filter_new( + mb_no2encoding(from), mb_no2encoding(mb_no_encoding_8bit), filter_into_work_buffer, NULL, part @@ -953,8 +953,8 @@ PHP_MAILPARSE_API void php_mimepart_decoder_prepare(php_mimepart *part, int do_d PHP_MAILPARSE_API void php_mimepart_decoder_finish(php_mimepart *part) { if (part->extract_filter) { - mbfl_convert_filter_flush(part->extract_filter); - mbfl_convert_filter_delete(part->extract_filter); + mb_convert_filter_flush(part->extract_filter); + mb_convert_filter_delete(part->extract_filter); } if (part->extract_func && part->parsedata.workbuf.len > 0) { part->extract_func(part, part->extract_context, part->parsedata.workbuf.c, part->parsedata.workbuf.len); @@ -969,7 +969,7 @@ PHP_MAILPARSE_API int php_mimepart_decoder_feed(php_mimepart *part, const char * if (part->extract_filter) { for (i = 0; i < bufsize; i++) { - if (mbfl_convert_filter_feed(buf[i], part->extract_filter) < 0) { + if (mb_convert_filter_feed(buf[i], part->extract_filter) < 0) { zend_error(E_WARNING, "%s() - filter conversion failed. Input message is probably incorrectly encoded\n", get_active_function_name()); return -1; diff --git a/php_mailparse_mime.h b/php_mailparse_mime.h index 38c5d40..b96aa1f 100644 --- a/php_mailparse_mime.h +++ b/php_mailparse_mime.h @@ -62,7 +62,7 @@ struct _php_mimepart { /* these are used during part extraction */ php_mimepart_extract_func_t extract_func; - mbfl_convert_filter *extract_filter; + mb_convert_filter *extract_filter; void *extract_context; /* these are used during parsing */