| /* | |
| * multibytecodec.c: Common Multibyte Codec Implementation | |
| * | |
| * Written by Hye-Shik Chang <perky@FreeBSD.org> | |
| */ | |
| #define PY_SSIZE_T_CLEAN | |
| #include "Python.h" | |
| #include "structmember.h" | |
| #include "multibytecodec.h" | |
| typedef struct { | |
| const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end; | |
| unsigned char *outbuf, *outbuf_end; | |
| PyObject *excobj, *outobj; | |
| } MultibyteEncodeBuffer; | |
| typedef struct { | |
| const unsigned char *inbuf, *inbuf_top, *inbuf_end; | |
| Py_UNICODE *outbuf, *outbuf_end; | |
| PyObject *excobj, *outobj; | |
| } MultibyteDecodeBuffer; | |
| PyDoc_STRVAR(MultibyteCodec_Encode__doc__, | |
| "I.encode(unicode[, errors]) -> (string, length consumed)\n\ | |
| \n\ | |
| Return an encoded string version of `unicode'. errors may be given to\n\ | |
| set a different error handling scheme. Default is 'strict' meaning that\n\ | |
| encoding errors raise a UnicodeEncodeError. Other possible values are\n\ | |
| 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\ | |
| registered with codecs.register_error that can handle UnicodeEncodeErrors."); | |
| PyDoc_STRVAR(MultibyteCodec_Decode__doc__, | |
| "I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\ | |
| \n\ | |
| Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\ | |
| to set a different error handling scheme. Default is 'strict' meaning\n\ | |
| that encoding errors raise a UnicodeDecodeError. Other possible values\n\ | |
| are 'ignore' and 'replace' as well as any other name registered with\n\ | |
| codecs.register_error that is able to handle UnicodeDecodeErrors."); | |
| static char *codeckwarglist[] = {"input", "errors", NULL}; | |
| static char *incnewkwarglist[] = {"errors", NULL}; | |
| static char *incrementalkwarglist[] = {"input", "final", NULL}; | |
| static char *streamkwarglist[] = {"stream", "errors", NULL}; | |
| static PyObject *multibytecodec_encode(MultibyteCodec *, | |
| MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t, | |
| PyObject *, int); | |
| #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */ | |
| static PyObject * | |
| make_tuple(PyObject *object, Py_ssize_t len) | |
| { | |
| PyObject *v, *w; | |
| if (object == NULL) | |
| return NULL; | |
| v = PyTuple_New(2); | |
| if (v == NULL) { | |
| Py_DECREF(object); | |
| return NULL; | |
| } | |
| PyTuple_SET_ITEM(v, 0, object); | |
| w = PyInt_FromSsize_t(len); | |
| if (w == NULL) { | |
| Py_DECREF(v); | |
| return NULL; | |
| } | |
| PyTuple_SET_ITEM(v, 1, w); | |
| return v; | |
| } | |
| static PyObject * | |
| internal_error_callback(const char *errors) | |
| { | |
| if (errors == NULL || strcmp(errors, "strict") == 0) | |
| return ERROR_STRICT; | |
| else if (strcmp(errors, "ignore") == 0) | |
| return ERROR_IGNORE; | |
| else if (strcmp(errors, "replace") == 0) | |
| return ERROR_REPLACE; | |
| else | |
| return PyString_FromString(errors); | |
| } | |
| static PyObject * | |
| call_error_callback(PyObject *errors, PyObject *exc) | |
| { | |
| PyObject *args, *cb, *r; | |
| assert(PyString_Check(errors)); | |
| cb = PyCodec_LookupError(PyString_AS_STRING(errors)); | |
| if (cb == NULL) | |
| return NULL; | |
| args = PyTuple_New(1); | |
| if (args == NULL) { | |
| Py_DECREF(cb); | |
| return NULL; | |
| } | |
| PyTuple_SET_ITEM(args, 0, exc); | |
| Py_INCREF(exc); | |
| r = PyObject_CallObject(cb, args); | |
| Py_DECREF(args); | |
| Py_DECREF(cb); | |
| return r; | |
| } | |
| static PyObject * | |
| codecctx_errors_get(MultibyteStatefulCodecContext *self) | |
| { | |
| const char *errors; | |
| if (self->errors == ERROR_STRICT) | |
| errors = "strict"; | |
| else if (self->errors == ERROR_IGNORE) | |
| errors = "ignore"; | |
| else if (self->errors == ERROR_REPLACE) | |
| errors = "replace"; | |
| else { | |
| Py_INCREF(self->errors); | |
| return self->errors; | |
| } | |
| return PyString_FromString(errors); | |
| } | |
| static int | |
| codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value, | |
| void *closure) | |
| { | |
| PyObject *cb; | |
| if (!PyString_Check(value)) { | |
| PyErr_SetString(PyExc_TypeError, "errors must be a string"); | |
| return -1; | |
| } | |
| cb = internal_error_callback(PyString_AS_STRING(value)); | |
| if (cb == NULL) | |
| return -1; | |
| ERROR_DECREF(self->errors); | |
| self->errors = cb; | |
| return 0; | |
| } | |
| /* This getset handlers list is used by all the stateful codec objects */ | |
| static PyGetSetDef codecctx_getsets[] = { | |
| {"errors", (getter)codecctx_errors_get, | |
| (setter)codecctx_errors_set, | |
| PyDoc_STR("how to treat errors")}, | |
| {NULL,} | |
| }; | |
| static int | |
| expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) | |
| { | |
| Py_ssize_t orgpos, orgsize, incsize; | |
| orgpos = (Py_ssize_t)((char *)buf->outbuf - | |
| PyString_AS_STRING(buf->outobj)); | |
| orgsize = PyString_GET_SIZE(buf->outobj); | |
| incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize); | |
| if (orgsize > PY_SSIZE_T_MAX - incsize) | |
| return -1; | |
| if (_PyString_Resize(&buf->outobj, orgsize + incsize) == -1) | |
| return -1; | |
| buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos; | |
| buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj) | |
| + PyString_GET_SIZE(buf->outobj); | |
| return 0; | |
| } | |
| #define REQUIRE_ENCODEBUFFER(buf, s) { \ | |
| if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ | |
| if (expand_encodebuffer(buf, s) == -1) \ | |
| goto errorexit; \ | |
| } | |
| static int | |
| expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) | |
| { | |
| Py_ssize_t orgpos, orgsize; | |
| orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); | |
| orgsize = PyUnicode_GET_SIZE(buf->outobj); | |
| if (PyUnicode_Resize(&buf->outobj, orgsize + ( | |
| esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) | |
| return -1; | |
| buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos; | |
| buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj) | |
| + PyUnicode_GET_SIZE(buf->outobj); | |
| return 0; | |
| } | |
| #define REQUIRE_DECODEBUFFER(buf, s) { \ | |
| if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ | |
| if (expand_decodebuffer(buf, s) == -1) \ | |
| goto errorexit; \ | |
| } | |
| /** | |
| * MultibyteCodec object | |
| */ | |
| static int | |
| multibytecodec_encerror(MultibyteCodec *codec, | |
| MultibyteCodec_State *state, | |
| MultibyteEncodeBuffer *buf, | |
| PyObject *errors, Py_ssize_t e) | |
| { | |
| PyObject *retobj = NULL, *retstr = NULL, *tobj; | |
| Py_ssize_t retstrsize, newpos; | |
| Py_ssize_t esize, start, end; | |
| const char *reason; | |
| if (e > 0) { | |
| reason = "illegal multibyte sequence"; | |
| esize = e; | |
| } | |
| else { | |
| switch (e) { | |
| case MBERR_TOOSMALL: | |
| REQUIRE_ENCODEBUFFER(buf, -1); | |
| return 0; /* retry it */ | |
| case MBERR_TOOFEW: | |
| reason = "incomplete multibyte sequence"; | |
| esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); | |
| break; | |
| case MBERR_INTERNAL: | |
| PyErr_SetString(PyExc_RuntimeError, | |
| "internal codec error"); | |
| return -1; | |
| default: | |
| PyErr_SetString(PyExc_RuntimeError, | |
| "unknown runtime error"); | |
| return -1; | |
| } | |
| } | |
| if (errors == ERROR_REPLACE) { | |
| const Py_UNICODE replchar = '?', *inbuf = &replchar; | |
| Py_ssize_t r; | |
| for (;;) { | |
| Py_ssize_t outleft; | |
| outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); | |
| r = codec->encode(state, codec->config, &inbuf, 1, | |
| &buf->outbuf, outleft, 0); | |
| if (r == MBERR_TOOSMALL) { | |
| REQUIRE_ENCODEBUFFER(buf, -1); | |
| continue; | |
| } | |
| else | |
| break; | |
| } | |
| if (r != 0) { | |
| REQUIRE_ENCODEBUFFER(buf, 1); | |
| *buf->outbuf++ = '?'; | |
| } | |
| } | |
| if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { | |
| buf->inbuf += esize; | |
| return 0; | |
| } | |
| start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); | |
| end = start + esize; | |
| /* use cached exception object if available */ | |
| if (buf->excobj == NULL) { | |
| buf->excobj = PyUnicodeEncodeError_Create(codec->encoding, | |
| buf->inbuf_top, | |
| buf->inbuf_end - buf->inbuf_top, | |
| start, end, reason); | |
| if (buf->excobj == NULL) | |
| goto errorexit; | |
| } | |
| else | |
| if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 || | |
| PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || | |
| PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) | |
| goto errorexit; | |
| if (errors == ERROR_STRICT) { | |
| PyCodec_StrictErrors(buf->excobj); | |
| goto errorexit; | |
| } | |
| retobj = call_error_callback(errors, buf->excobj); | |
| if (retobj == NULL) | |
| goto errorexit; | |
| if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || | |
| !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) || | |
| !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) || | |
| PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) { | |
| PyErr_SetString(PyExc_TypeError, | |
| "encoding error handler must return " | |
| "(unicode, int) tuple"); | |
| goto errorexit; | |
| } | |
| { | |
| const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj); | |
| retstr = multibytecodec_encode(codec, state, &uraw, | |
| PyUnicode_GET_SIZE(tobj), ERROR_STRICT, | |
| MBENC_FLUSH); | |
| if (retstr == NULL) | |
| goto errorexit; | |
| } | |
| retstrsize = PyString_GET_SIZE(retstr); | |
| REQUIRE_ENCODEBUFFER(buf, retstrsize); | |
| memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize); | |
| buf->outbuf += retstrsize; | |
| newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); | |
| if (newpos < 0 && !PyErr_Occurred()) | |
| newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); | |
| if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { | |
| PyErr_Clear(); | |
| PyErr_Format(PyExc_IndexError, | |
| "position %zd from error handler out of bounds", | |
| newpos); | |
| goto errorexit; | |
| } | |
| buf->inbuf = buf->inbuf_top + newpos; | |
| Py_DECREF(retobj); | |
| Py_DECREF(retstr); | |
| return 0; | |
| errorexit: | |
| Py_XDECREF(retobj); | |
| Py_XDECREF(retstr); | |
| return -1; | |
| } | |
| static int | |
| multibytecodec_decerror(MultibyteCodec *codec, | |
| MultibyteCodec_State *state, | |
| MultibyteDecodeBuffer *buf, | |
| PyObject *errors, Py_ssize_t e) | |
| { | |
| PyObject *retobj = NULL, *retuni = NULL; | |
| Py_ssize_t retunisize, newpos; | |
| const char *reason; | |
| Py_ssize_t esize, start, end; | |
| if (e > 0) { | |
| reason = "illegal multibyte sequence"; | |
| esize = e; | |
| } | |
| else { | |
| switch (e) { | |
| case MBERR_TOOSMALL: | |
| REQUIRE_DECODEBUFFER(buf, -1); | |
| return 0; /* retry it */ | |
| case MBERR_TOOFEW: | |
| reason = "incomplete multibyte sequence"; | |
| esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); | |
| break; | |
| case MBERR_INTERNAL: | |
| PyErr_SetString(PyExc_RuntimeError, | |
| "internal codec error"); | |
| return -1; | |
| default: | |
| PyErr_SetString(PyExc_RuntimeError, | |
| "unknown runtime error"); | |
| return -1; | |
| } | |
| } | |
| if (errors == ERROR_REPLACE) { | |
| REQUIRE_DECODEBUFFER(buf, 1); | |
| *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER; | |
| } | |
| if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { | |
| buf->inbuf += esize; | |
| return 0; | |
| } | |
| start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); | |
| end = start + esize; | |
| /* use cached exception object if available */ | |
| if (buf->excobj == NULL) { | |
| buf->excobj = PyUnicodeDecodeError_Create(codec->encoding, | |
| (const char *)buf->inbuf_top, | |
| (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top), | |
| start, end, reason); | |
| if (buf->excobj == NULL) | |
| goto errorexit; | |
| } | |
| else | |
| if (PyUnicodeDecodeError_SetStart(buf->excobj, start) || | |
| PyUnicodeDecodeError_SetEnd(buf->excobj, end) || | |
| PyUnicodeDecodeError_SetReason(buf->excobj, reason)) | |
| goto errorexit; | |
| if (errors == ERROR_STRICT) { | |
| PyCodec_StrictErrors(buf->excobj); | |
| goto errorexit; | |
| } | |
| retobj = call_error_callback(errors, buf->excobj); | |
| if (retobj == NULL) | |
| goto errorexit; | |
| if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || | |
| !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || | |
| !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) || | |
| PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) { | |
| PyErr_SetString(PyExc_TypeError, | |
| "decoding error handler must return " | |
| "(unicode, int) tuple"); | |
| goto errorexit; | |
| } | |
| retunisize = PyUnicode_GET_SIZE(retuni); | |
| if (retunisize > 0) { | |
| REQUIRE_DECODEBUFFER(buf, retunisize); | |
| memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni), | |
| retunisize * Py_UNICODE_SIZE); | |
| buf->outbuf += retunisize; | |
| } | |
| newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); | |
| if (newpos < 0 && !PyErr_Occurred()) | |
| newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); | |
| if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { | |
| PyErr_Clear(); | |
| PyErr_Format(PyExc_IndexError, | |
| "position %zd from error handler out of bounds", | |
| newpos); | |
| goto errorexit; | |
| } | |
| buf->inbuf = buf->inbuf_top + newpos; | |
| Py_DECREF(retobj); | |
| return 0; | |
| errorexit: | |
| Py_XDECREF(retobj); | |
| return -1; | |
| } | |
| static PyObject * | |
| multibytecodec_encode(MultibyteCodec *codec, | |
| MultibyteCodec_State *state, | |
| const Py_UNICODE **data, Py_ssize_t datalen, | |
| PyObject *errors, int flags) | |
| { | |
| MultibyteEncodeBuffer buf; | |
| Py_ssize_t finalsize, r = 0; | |
| if (datalen == 0 && !(flags & MBENC_RESET)) | |
| return PyString_FromString(""); | |
| buf.excobj = NULL; | |
| buf.inbuf = buf.inbuf_top = *data; | |
| buf.inbuf_end = buf.inbuf_top + datalen; | |
| if (datalen > (PY_SSIZE_T_MAX - 16) / 2) { | |
| PyErr_NoMemory(); | |
| goto errorexit; | |
| } | |
| buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16); | |
| if (buf.outobj == NULL) | |
| goto errorexit; | |
| buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj); | |
| buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj); | |
| while (buf.inbuf < buf.inbuf_end) { | |
| Py_ssize_t inleft, outleft; | |
| /* we don't reuse inleft and outleft here. | |
| * error callbacks can relocate the cursor anywhere on buffer*/ | |
| inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); | |
| outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); | |
| r = codec->encode(state, codec->config, &buf.inbuf, inleft, | |
| &buf.outbuf, outleft, flags); | |
| if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH))) | |
| break; | |
| else if (multibytecodec_encerror(codec, state, &buf, errors,r)) | |
| goto errorexit; | |
| else if (r == MBERR_TOOFEW) | |
| break; | |
| } | |
| if (codec->encreset != NULL && (flags & MBENC_RESET)) | |
| for (;;) { | |
| Py_ssize_t outleft; | |
| outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); | |
| r = codec->encreset(state, codec->config, &buf.outbuf, | |
| outleft); | |
| if (r == 0) | |
| break; | |
| else if (multibytecodec_encerror(codec, state, | |
| &buf, errors, r)) | |
| goto errorexit; | |
| } | |
| finalsize = (Py_ssize_t)((char *)buf.outbuf - | |
| PyString_AS_STRING(buf.outobj)); | |
| if (finalsize != PyString_GET_SIZE(buf.outobj)) | |
| if (_PyString_Resize(&buf.outobj, finalsize) == -1) | |
| goto errorexit; | |
| *data = buf.inbuf; | |
| Py_XDECREF(buf.excobj); | |
| return buf.outobj; | |
| errorexit: | |
| Py_XDECREF(buf.excobj); | |
| Py_XDECREF(buf.outobj); | |
| return NULL; | |
| } | |
| static PyObject * | |
| MultibyteCodec_Encode(MultibyteCodecObject *self, | |
| PyObject *args, PyObject *kwargs) | |
| { | |
| MultibyteCodec_State state; | |
| Py_UNICODE *data; | |
| PyObject *errorcb, *r, *arg, *ucvt; | |
| const char *errors = NULL; | |
| Py_ssize_t datalen; | |
| if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode", | |
| codeckwarglist, &arg, &errors)) | |
| return NULL; | |
| if (PyUnicode_Check(arg)) | |
| ucvt = NULL; | |
| else { | |
| arg = ucvt = PyObject_Unicode(arg); | |
| if (arg == NULL) | |
| return NULL; | |
| else if (!PyUnicode_Check(arg)) { | |
| PyErr_SetString(PyExc_TypeError, | |
| "couldn't convert the object to unicode."); | |
| Py_DECREF(ucvt); | |
| return NULL; | |
| } | |
| } | |
| data = PyUnicode_AS_UNICODE(arg); | |
| datalen = PyUnicode_GET_SIZE(arg); | |
| errorcb = internal_error_callback(errors); | |
| if (errorcb == NULL) { | |
| Py_XDECREF(ucvt); | |
| return NULL; | |
| } | |
| if (self->codec->encinit != NULL && | |
| self->codec->encinit(&state, self->codec->config) != 0) | |
| goto errorexit; | |
| r = multibytecodec_encode(self->codec, &state, | |
| (const Py_UNICODE **)&data, datalen, errorcb, | |
| MBENC_FLUSH | MBENC_RESET); | |
| if (r == NULL) | |
| goto errorexit; | |
| ERROR_DECREF(errorcb); | |
| Py_XDECREF(ucvt); | |
| return make_tuple(r, datalen); | |
| errorexit: | |
| ERROR_DECREF(errorcb); | |
| Py_XDECREF(ucvt); | |
| return NULL; | |
| } | |
| static PyObject * | |
| MultibyteCodec_Decode(MultibyteCodecObject *self, | |
| PyObject *args, PyObject *kwargs) | |
| { | |
| MultibyteCodec_State state; | |
| MultibyteDecodeBuffer buf; | |
| PyObject *errorcb; | |
| Py_buffer pdata; | |
| const char *data, *errors = NULL; | |
| Py_ssize_t datalen, finalsize; | |
| if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|z:decode", | |
| codeckwarglist, &pdata, &errors)) | |
| return NULL; | |
| data = pdata.buf; | |
| datalen = pdata.len; | |
| errorcb = internal_error_callback(errors); | |
| if (errorcb == NULL) { | |
| PyBuffer_Release(&pdata); | |
| return NULL; | |
| } | |
| if (datalen == 0) { | |
| PyBuffer_Release(&pdata); | |
| ERROR_DECREF(errorcb); | |
| return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0); | |
| } | |
| buf.excobj = NULL; | |
| buf.inbuf = buf.inbuf_top = (unsigned char *)data; | |
| buf.inbuf_end = buf.inbuf_top + datalen; | |
| buf.outobj = PyUnicode_FromUnicode(NULL, datalen); | |
| if (buf.outobj == NULL) | |
| goto errorexit; | |
| buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); | |
| buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); | |
| if (self->codec->decinit != NULL && | |
| self->codec->decinit(&state, self->codec->config) != 0) | |
| goto errorexit; | |
| while (buf.inbuf < buf.inbuf_end) { | |
| Py_ssize_t inleft, outleft, r; | |
| inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); | |
| outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); | |
| r = self->codec->decode(&state, self->codec->config, | |
| &buf.inbuf, inleft, &buf.outbuf, outleft); | |
| if (r == 0) | |
| break; | |
| else if (multibytecodec_decerror(self->codec, &state, | |
| &buf, errorcb, r)) | |
| goto errorexit; | |
| } | |
| finalsize = (Py_ssize_t)(buf.outbuf - | |
| PyUnicode_AS_UNICODE(buf.outobj)); | |
| if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) | |
| if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) | |
| goto errorexit; | |
| PyBuffer_Release(&pdata); | |
| Py_XDECREF(buf.excobj); | |
| ERROR_DECREF(errorcb); | |
| return make_tuple(buf.outobj, datalen); | |
| errorexit: | |
| PyBuffer_Release(&pdata); | |
| ERROR_DECREF(errorcb); | |
| Py_XDECREF(buf.excobj); | |
| Py_XDECREF(buf.outobj); | |
| return NULL; | |
| } | |
| static struct PyMethodDef multibytecodec_methods[] = { | |
| {"encode", (PyCFunction)MultibyteCodec_Encode, | |
| METH_VARARGS | METH_KEYWORDS, | |
| MultibyteCodec_Encode__doc__}, | |
| {"decode", (PyCFunction)MultibyteCodec_Decode, | |
| METH_VARARGS | METH_KEYWORDS, | |
| MultibyteCodec_Decode__doc__}, | |
| {NULL, NULL}, | |
| }; | |
| static void | |
| multibytecodec_dealloc(MultibyteCodecObject *self) | |
| { | |
| PyObject_Del(self); | |
| } | |
| static PyTypeObject MultibyteCodec_Type = { | |
| PyVarObject_HEAD_INIT(NULL, 0) | |
| "MultibyteCodec", /* tp_name */ | |
| sizeof(MultibyteCodecObject), /* tp_basicsize */ | |
| 0, /* tp_itemsize */ | |
| /* methods */ | |
| (destructor)multibytecodec_dealloc, /* tp_dealloc */ | |
| 0, /* tp_print */ | |
| 0, /* tp_getattr */ | |
| 0, /* tp_setattr */ | |
| 0, /* tp_compare */ | |
| 0, /* tp_repr */ | |
| 0, /* tp_as_number */ | |
| 0, /* tp_as_sequence */ | |
| 0, /* tp_as_mapping */ | |
| 0, /* tp_hash */ | |
| 0, /* tp_call */ | |
| 0, /* tp_str */ | |
| PyObject_GenericGetAttr, /* tp_getattro */ | |
| 0, /* tp_setattro */ | |
| 0, /* tp_as_buffer */ | |
| Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
| 0, /* tp_doc */ | |
| 0, /* tp_traverse */ | |
| 0, /* tp_clear */ | |
| 0, /* tp_richcompare */ | |
| 0, /* tp_weaklistoffset */ | |
| 0, /* tp_iter */ | |
| 0, /* tp_iterext */ | |
| multibytecodec_methods, /* tp_methods */ | |
| }; | |
| /** | |
| * Utility functions for stateful codec mechanism | |
| */ | |
| #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o)) | |
| #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o)) | |
| static PyObject * | |
| encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, | |
| PyObject *unistr, int final) | |
| { | |
| PyObject *ucvt, *r = NULL; | |
| Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; | |
| Py_ssize_t datalen, origpending; | |
| if (PyUnicode_Check(unistr)) | |
| ucvt = NULL; | |
| else { | |
| unistr = ucvt = PyObject_Unicode(unistr); | |
| if (unistr == NULL) | |
| return NULL; | |
| else if (!PyUnicode_Check(unistr)) { | |
| PyErr_SetString(PyExc_TypeError, | |
| "couldn't convert the object to unicode."); | |
| Py_DECREF(ucvt); | |
| return NULL; | |
| } | |
| } | |
| datalen = PyUnicode_GET_SIZE(unistr); | |
| origpending = ctx->pendingsize; | |
| if (origpending > 0) { | |
| if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) { | |
| PyErr_NoMemory(); | |
| /* inbuf_tmp == NULL */ | |
| goto errorexit; | |
| } | |
| inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize); | |
| if (inbuf_tmp == NULL) | |
| goto errorexit; | |
| memcpy(inbuf_tmp, ctx->pending, | |
| Py_UNICODE_SIZE * ctx->pendingsize); | |
| memcpy(inbuf_tmp + ctx->pendingsize, | |
| PyUnicode_AS_UNICODE(unistr), | |
| Py_UNICODE_SIZE * datalen); | |
| datalen += ctx->pendingsize; | |
| ctx->pendingsize = 0; | |
| inbuf = inbuf_tmp; | |
| } | |
| else | |
| inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr); | |
| inbuf_end = inbuf + datalen; | |
| r = multibytecodec_encode(ctx->codec, &ctx->state, | |
| (const Py_UNICODE **)&inbuf, datalen, | |
| ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0); | |
| if (r == NULL) { | |
| /* recover the original pending buffer */ | |
| if (origpending > 0) | |
| memcpy(ctx->pending, inbuf_tmp, | |
| Py_UNICODE_SIZE * origpending); | |
| ctx->pendingsize = origpending; | |
| goto errorexit; | |
| } | |
| if (inbuf < inbuf_end) { | |
| ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); | |
| if (ctx->pendingsize > MAXENCPENDING) { | |
| /* normal codecs can't reach here */ | |
| ctx->pendingsize = 0; | |
| PyErr_SetString(PyExc_UnicodeError, | |
| "pending buffer overflow"); | |
| goto errorexit; | |
| } | |
| memcpy(ctx->pending, inbuf, | |
| ctx->pendingsize * Py_UNICODE_SIZE); | |
| } | |
| if (inbuf_tmp != NULL) | |
| PyMem_Del(inbuf_tmp); | |
| Py_XDECREF(ucvt); | |
| return r; | |
| errorexit: | |
| if (inbuf_tmp != NULL) | |
| PyMem_Del(inbuf_tmp); | |
| Py_XDECREF(r); | |
| Py_XDECREF(ucvt); | |
| return NULL; | |
| } | |
| static int | |
| decoder_append_pending(MultibyteStatefulDecoderContext *ctx, | |
| MultibyteDecodeBuffer *buf) | |
| { | |
| Py_ssize_t npendings; | |
| npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); | |
| if (npendings + ctx->pendingsize > MAXDECPENDING || | |
| npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { | |
| PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); | |
| return -1; | |
| } | |
| memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); | |
| ctx->pendingsize += npendings; | |
| return 0; | |
| } | |
| static int | |
| decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data, | |
| Py_ssize_t size) | |
| { | |
| buf->inbuf = buf->inbuf_top = (const unsigned char *)data; | |
| buf->inbuf_end = buf->inbuf_top + size; | |
| if (buf->outobj == NULL) { /* only if outobj is not allocated yet */ | |
| buf->outobj = PyUnicode_FromUnicode(NULL, size); | |
| if (buf->outobj == NULL) | |
| return -1; | |
| buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj); | |
| buf->outbuf_end = buf->outbuf + | |
| PyUnicode_GET_SIZE(buf->outobj); | |
| } | |
| return 0; | |
| } | |
| static int | |
| decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx, | |
| MultibyteDecodeBuffer *buf) | |
| { | |
| while (buf->inbuf < buf->inbuf_end) { | |
| Py_ssize_t inleft, outleft; | |
| Py_ssize_t r; | |
| inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); | |
| outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); | |
| r = ctx->codec->decode(&ctx->state, ctx->codec->config, | |
| &buf->inbuf, inleft, &buf->outbuf, outleft); | |
| if (r == 0 || r == MBERR_TOOFEW) | |
| break; | |
| else if (multibytecodec_decerror(ctx->codec, &ctx->state, | |
| buf, ctx->errors, r)) | |
| return -1; | |
| } | |
| return 0; | |
| } | |
| /** | |
| * MultibyteIncrementalEncoder object | |
| */ | |
| static PyObject * | |
| mbiencoder_encode(MultibyteIncrementalEncoderObject *self, | |
| PyObject *args, PyObject *kwargs) | |
| { | |
| PyObject *data; | |
| int final = 0; | |
| if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode", | |
| incrementalkwarglist, &data, &final)) | |
| return NULL; | |
| return encoder_encode_stateful(STATEFUL_ECTX(self), data, final); | |
| } | |
| static PyObject * | |
| mbiencoder_reset(MultibyteIncrementalEncoderObject *self) | |
| { | |
| if (self->codec->decreset != NULL && | |
| self->codec->decreset(&self->state, self->codec->config) != 0) | |
| return NULL; | |
| self->pendingsize = 0; | |
| Py_RETURN_NONE; | |
| } | |
| static struct PyMethodDef mbiencoder_methods[] = { | |
| {"encode", (PyCFunction)mbiencoder_encode, | |
| METH_VARARGS | METH_KEYWORDS, NULL}, | |
| {"reset", (PyCFunction)mbiencoder_reset, | |
| METH_NOARGS, NULL}, | |
| {NULL, NULL}, | |
| }; | |
| static PyObject * | |
| mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |
| { | |
| MultibyteIncrementalEncoderObject *self; | |
| PyObject *codec = NULL; | |
| char *errors = NULL; | |
| if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder", | |
| incnewkwarglist, &errors)) | |
| return NULL; | |
| self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0); | |
| if (self == NULL) | |
| return NULL; | |
| codec = PyObject_GetAttrString((PyObject *)type, "codec"); | |
| if (codec == NULL) | |
| goto errorexit; | |
| if (!MultibyteCodec_Check(codec)) { | |
| PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); | |
| goto errorexit; | |
| } | |
| self->codec = ((MultibyteCodecObject *)codec)->codec; | |
| self->pendingsize = 0; | |
| self->errors = internal_error_callback(errors); | |
| if (self->errors == NULL) | |
| goto errorexit; | |
| if (self->codec->encinit != NULL && | |
| self->codec->encinit(&self->state, self->codec->config) != 0) | |
| goto errorexit; | |
| Py_DECREF(codec); | |
| return (PyObject *)self; | |
| errorexit: | |
| Py_XDECREF(self); | |
| Py_XDECREF(codec); | |
| return NULL; | |
| } | |
| static int | |
| mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds) | |
| { | |
| return 0; | |
| } | |
| static int | |
| mbiencoder_traverse(MultibyteIncrementalEncoderObject *self, | |
| visitproc visit, void *arg) | |
| { | |
| if (ERROR_ISCUSTOM(self->errors)) | |
| Py_VISIT(self->errors); | |
| return 0; | |
| } | |
| static void | |
| mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self) | |
| { | |
| PyObject_GC_UnTrack(self); | |
| ERROR_DECREF(self->errors); | |
| Py_TYPE(self)->tp_free(self); | |
| } | |
| static PyTypeObject MultibyteIncrementalEncoder_Type = { | |
| PyVarObject_HEAD_INIT(NULL, 0) | |
| "MultibyteIncrementalEncoder", /* tp_name */ | |
| sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */ | |
| 0, /* tp_itemsize */ | |
| /* methods */ | |
| (destructor)mbiencoder_dealloc, /* tp_dealloc */ | |
| 0, /* tp_print */ | |
| 0, /* tp_getattr */ | |
| 0, /* tp_setattr */ | |
| 0, /* tp_compare */ | |
| 0, /* tp_repr */ | |
| 0, /* tp_as_number */ | |
| 0, /* tp_as_sequence */ | |
| 0, /* tp_as_mapping */ | |
| 0, /* tp_hash */ | |
| 0, /* tp_call */ | |
| 0, /* tp_str */ | |
| PyObject_GenericGetAttr, /* tp_getattro */ | |
| 0, /* tp_setattro */ | |
| 0, /* tp_as_buffer */ | |
| Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | |
| | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
| 0, /* tp_doc */ | |
| (traverseproc)mbiencoder_traverse, /* tp_traverse */ | |
| 0, /* tp_clear */ | |
| 0, /* tp_richcompare */ | |
| 0, /* tp_weaklistoffset */ | |
| 0, /* tp_iter */ | |
| 0, /* tp_iterext */ | |
| mbiencoder_methods, /* tp_methods */ | |
| 0, /* tp_members */ | |
| codecctx_getsets, /* tp_getset */ | |
| 0, /* tp_base */ | |
| 0, /* tp_dict */ | |
| 0, /* tp_descr_get */ | |
| 0, /* tp_descr_set */ | |
| 0, /* tp_dictoffset */ | |
| mbiencoder_init, /* tp_init */ | |
| 0, /* tp_alloc */ | |
| mbiencoder_new, /* tp_new */ | |
| }; | |
| /** | |
| * MultibyteIncrementalDecoder object | |
| */ | |
| static PyObject * | |
| mbidecoder_decode(MultibyteIncrementalDecoderObject *self, | |
| PyObject *args, PyObject *kwargs) | |
| { | |
| MultibyteDecodeBuffer buf; | |
| char *data, *wdata = NULL; | |
| Py_buffer pdata; | |
| Py_ssize_t wsize, finalsize = 0, size, origpending; | |
| int final = 0; | |
| if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i:decode", | |
| incrementalkwarglist, &pdata, &final)) | |
| return NULL; | |
| data = pdata.buf; | |
| size = pdata.len; | |
| buf.outobj = buf.excobj = NULL; | |
| origpending = self->pendingsize; | |
| if (self->pendingsize == 0) { | |
| wsize = size; | |
| wdata = data; | |
| } | |
| else { | |
| if (size > PY_SSIZE_T_MAX - self->pendingsize) { | |
| PyErr_NoMemory(); | |
| goto errorexit; | |
| } | |
| wsize = size + self->pendingsize; | |
| wdata = PyMem_Malloc(wsize); | |
| if (wdata == NULL) | |
| goto errorexit; | |
| memcpy(wdata, self->pending, self->pendingsize); | |
| memcpy(wdata + self->pendingsize, data, size); | |
| self->pendingsize = 0; | |
| } | |
| if (decoder_prepare_buffer(&buf, wdata, wsize) != 0) | |
| goto errorexit; | |
| if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf)) | |
| goto errorexit; | |
| if (final && buf.inbuf < buf.inbuf_end) { | |
| if (multibytecodec_decerror(self->codec, &self->state, | |
| &buf, self->errors, MBERR_TOOFEW)) { | |
| /* recover the original pending buffer */ | |
| memcpy(self->pending, wdata, origpending); | |
| self->pendingsize = origpending; | |
| goto errorexit; | |
| } | |
| } | |
| if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */ | |
| if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0) | |
| goto errorexit; | |
| } | |
| finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); | |
| if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) | |
| if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) | |
| goto errorexit; | |
| PyBuffer_Release(&pdata); | |
| if (wdata != data) | |
| PyMem_Del(wdata); | |
| Py_XDECREF(buf.excobj); | |
| return buf.outobj; | |
| errorexit: | |
| PyBuffer_Release(&pdata); | |
| if (wdata != NULL && wdata != data) | |
| PyMem_Del(wdata); | |
| Py_XDECREF(buf.excobj); | |
| Py_XDECREF(buf.outobj); | |
| return NULL; | |
| } | |
| static PyObject * | |
| mbidecoder_reset(MultibyteIncrementalDecoderObject *self) | |
| { | |
| if (self->codec->decreset != NULL && | |
| self->codec->decreset(&self->state, self->codec->config) != 0) | |
| return NULL; | |
| self->pendingsize = 0; | |
| Py_RETURN_NONE; | |
| } | |
| static struct PyMethodDef mbidecoder_methods[] = { | |
| {"decode", (PyCFunction)mbidecoder_decode, | |
| METH_VARARGS | METH_KEYWORDS, NULL}, | |
| {"reset", (PyCFunction)mbidecoder_reset, | |
| METH_NOARGS, NULL}, | |
| {NULL, NULL}, | |
| }; | |
| static PyObject * | |
| mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |
| { | |
| MultibyteIncrementalDecoderObject *self; | |
| PyObject *codec = NULL; | |
| char *errors = NULL; | |
| if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder", | |
| incnewkwarglist, &errors)) | |
| return NULL; | |
| self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0); | |
| if (self == NULL) | |
| return NULL; | |
| codec = PyObject_GetAttrString((PyObject *)type, "codec"); | |
| if (codec == NULL) | |
| goto errorexit; | |
| if (!MultibyteCodec_Check(codec)) { | |
| PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); | |
| goto errorexit; | |
| } | |
| self->codec = ((MultibyteCodecObject *)codec)->codec; | |
| self->pendingsize = 0; | |
| self->errors = internal_error_callback(errors); | |
| if (self->errors == NULL) | |
| goto errorexit; | |
| if (self->codec->decinit != NULL && | |
| self->codec->decinit(&self->state, self->codec->config) != 0) | |
| goto errorexit; | |
| Py_DECREF(codec); | |
| return (PyObject *)self; | |
| errorexit: | |
| Py_XDECREF(self); | |
| Py_XDECREF(codec); | |
| return NULL; | |
| } | |
| static int | |
| mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds) | |
| { | |
| return 0; | |
| } | |
| static int | |
| mbidecoder_traverse(MultibyteIncrementalDecoderObject *self, | |
| visitproc visit, void *arg) | |
| { | |
| if (ERROR_ISCUSTOM(self->errors)) | |
| Py_VISIT(self->errors); | |
| return 0; | |
| } | |
| static void | |
| mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self) | |
| { | |
| PyObject_GC_UnTrack(self); | |
| ERROR_DECREF(self->errors); | |
| Py_TYPE(self)->tp_free(self); | |
| } | |
| static PyTypeObject MultibyteIncrementalDecoder_Type = { | |
| PyVarObject_HEAD_INIT(NULL, 0) | |
| "MultibyteIncrementalDecoder", /* tp_name */ | |
| sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */ | |
| 0, /* tp_itemsize */ | |
| /* methods */ | |
| (destructor)mbidecoder_dealloc, /* tp_dealloc */ | |
| 0, /* tp_print */ | |
| 0, /* tp_getattr */ | |
| 0, /* tp_setattr */ | |
| 0, /* tp_compare */ | |
| 0, /* tp_repr */ | |
| 0, /* tp_as_number */ | |
| 0, /* tp_as_sequence */ | |
| 0, /* tp_as_mapping */ | |
| 0, /* tp_hash */ | |
| 0, /* tp_call */ | |
| 0, /* tp_str */ | |
| PyObject_GenericGetAttr, /* tp_getattro */ | |
| 0, /* tp_setattro */ | |
| 0, /* tp_as_buffer */ | |
| Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | |
| | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
| 0, /* tp_doc */ | |
| (traverseproc)mbidecoder_traverse, /* tp_traverse */ | |
| 0, /* tp_clear */ | |
| 0, /* tp_richcompare */ | |
| 0, /* tp_weaklistoffset */ | |
| 0, /* tp_iter */ | |
| 0, /* tp_iterext */ | |
| mbidecoder_methods, /* tp_methods */ | |
| 0, /* tp_members */ | |
| codecctx_getsets, /* tp_getset */ | |
| 0, /* tp_base */ | |
| 0, /* tp_dict */ | |
| 0, /* tp_descr_get */ | |
| 0, /* tp_descr_set */ | |
| 0, /* tp_dictoffset */ | |
| mbidecoder_init, /* tp_init */ | |
| 0, /* tp_alloc */ | |
| mbidecoder_new, /* tp_new */ | |
| }; | |
| /** | |
| * MultibyteStreamReader object | |
| */ | |
| static PyObject * | |
| mbstreamreader_iread(MultibyteStreamReaderObject *self, | |
| const char *method, Py_ssize_t sizehint) | |
| { | |
| MultibyteDecodeBuffer buf; | |
| PyObject *cres; | |
| Py_ssize_t rsize, finalsize = 0; | |
| if (sizehint == 0) | |
| return PyUnicode_FromUnicode(NULL, 0); | |
| buf.outobj = buf.excobj = NULL; | |
| cres = NULL; | |
| for (;;) { | |
| int endoffile; | |
| if (sizehint < 0) | |
| cres = PyObject_CallMethod(self->stream, | |
| (char *)method, NULL); | |
| else | |
| cres = PyObject_CallMethod(self->stream, | |
| (char *)method, "i", sizehint); | |
| if (cres == NULL) | |
| goto errorexit; | |
| if (!PyString_Check(cres)) { | |
| PyErr_SetString(PyExc_TypeError, | |
| "stream function returned a " | |
| "non-string object"); | |
| goto errorexit; | |
| } | |
| endoffile = (PyString_GET_SIZE(cres) == 0); | |
| if (self->pendingsize > 0) { | |
| PyObject *ctr; | |
| char *ctrdata; | |
| if (PyString_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) { | |
| PyErr_NoMemory(); | |
| goto errorexit; | |
| } | |
| rsize = PyString_GET_SIZE(cres) + self->pendingsize; | |
| ctr = PyString_FromStringAndSize(NULL, rsize); | |
| if (ctr == NULL) | |
| goto errorexit; | |
| ctrdata = PyString_AS_STRING(ctr); | |
| memcpy(ctrdata, self->pending, self->pendingsize); | |
| memcpy(ctrdata + self->pendingsize, | |
| PyString_AS_STRING(cres), | |
| PyString_GET_SIZE(cres)); | |
| Py_DECREF(cres); | |
| cres = ctr; | |
| self->pendingsize = 0; | |
| } | |
| rsize = PyString_GET_SIZE(cres); | |
| if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres), | |
| rsize) != 0) | |
| goto errorexit; | |
| if (rsize > 0 && decoder_feed_buffer( | |
| (MultibyteStatefulDecoderContext *)self, &buf)) | |
| goto errorexit; | |
| if (endoffile || sizehint < 0) { | |
| if (buf.inbuf < buf.inbuf_end && | |
| multibytecodec_decerror(self->codec, &self->state, | |
| &buf, self->errors, MBERR_TOOFEW)) | |
| goto errorexit; | |
| } | |
| if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ | |
| if (decoder_append_pending(STATEFUL_DCTX(self), | |
| &buf) != 0) | |
| goto errorexit; | |
| } | |
| finalsize = (Py_ssize_t)(buf.outbuf - | |
| PyUnicode_AS_UNICODE(buf.outobj)); | |
| Py_DECREF(cres); | |
| cres = NULL; | |
| if (sizehint < 0 || finalsize != 0 || rsize == 0) | |
| break; | |
| sizehint = 1; /* read 1 more byte and retry */ | |
| } | |
| if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) | |
| if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) | |
| goto errorexit; | |
| Py_XDECREF(cres); | |
| Py_XDECREF(buf.excobj); | |
| return buf.outobj; | |
| errorexit: | |
| Py_XDECREF(cres); | |
| Py_XDECREF(buf.excobj); | |
| Py_XDECREF(buf.outobj); | |
| return NULL; | |
| } | |
| static PyObject * | |
| mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args) | |
| { | |
| PyObject *sizeobj = NULL; | |
| Py_ssize_t size; | |
| if (!PyArg_UnpackTuple(args, "read", 0, 1, &sizeobj)) | |
| return NULL; | |
| if (sizeobj == Py_None || sizeobj == NULL) | |
| size = -1; | |
| else if (PyInt_Check(sizeobj)) | |
| size = PyInt_AsSsize_t(sizeobj); | |
| else { | |
| PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); | |
| return NULL; | |
| } | |
| return mbstreamreader_iread(self, "read", size); | |
| } | |
| static PyObject * | |
| mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args) | |
| { | |
| PyObject *sizeobj = NULL; | |
| Py_ssize_t size; | |
| if (!PyArg_UnpackTuple(args, "readline", 0, 1, &sizeobj)) | |
| return NULL; | |
| if (sizeobj == Py_None || sizeobj == NULL) | |
| size = -1; | |
| else if (PyInt_Check(sizeobj)) | |
| size = PyInt_AsSsize_t(sizeobj); | |
| else { | |
| PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); | |
| return NULL; | |
| } | |
| return mbstreamreader_iread(self, "readline", size); | |
| } | |
| static PyObject * | |
| mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args) | |
| { | |
| PyObject *sizehintobj = NULL, *r, *sr; | |
| Py_ssize_t sizehint; | |
| if (!PyArg_UnpackTuple(args, "readlines", 0, 1, &sizehintobj)) | |
| return NULL; | |
| if (sizehintobj == Py_None || sizehintobj == NULL) | |
| sizehint = -1; | |
| else if (PyInt_Check(sizehintobj)) | |
| sizehint = PyInt_AsSsize_t(sizehintobj); | |
| else { | |
| PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); | |
| return NULL; | |
| } | |
| r = mbstreamreader_iread(self, "read", sizehint); | |
| if (r == NULL) | |
| return NULL; | |
| sr = PyUnicode_Splitlines(r, 1); | |
| Py_DECREF(r); | |
| return sr; | |
| } | |
| static PyObject * | |
| mbstreamreader_reset(MultibyteStreamReaderObject *self) | |
| { | |
| if (self->codec->decreset != NULL && | |
| self->codec->decreset(&self->state, self->codec->config) != 0) | |
| return NULL; | |
| self->pendingsize = 0; | |
| Py_RETURN_NONE; | |
| } | |
| static struct PyMethodDef mbstreamreader_methods[] = { | |
| {"read", (PyCFunction)mbstreamreader_read, | |
| METH_VARARGS, NULL}, | |
| {"readline", (PyCFunction)mbstreamreader_readline, | |
| METH_VARARGS, NULL}, | |
| {"readlines", (PyCFunction)mbstreamreader_readlines, | |
| METH_VARARGS, NULL}, | |
| {"reset", (PyCFunction)mbstreamreader_reset, | |
| METH_NOARGS, NULL}, | |
| {NULL, NULL}, | |
| }; | |
| static PyMemberDef mbstreamreader_members[] = { | |
| {"stream", T_OBJECT, | |
| offsetof(MultibyteStreamReaderObject, stream), | |
| READONLY, NULL}, | |
| {NULL,} | |
| }; | |
| static PyObject * | |
| mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |
| { | |
| MultibyteStreamReaderObject *self; | |
| PyObject *stream, *codec = NULL; | |
| char *errors = NULL; | |
| if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader", | |
| streamkwarglist, &stream, &errors)) | |
| return NULL; | |
| self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0); | |
| if (self == NULL) | |
| return NULL; | |
| codec = PyObject_GetAttrString((PyObject *)type, "codec"); | |
| if (codec == NULL) | |
| goto errorexit; | |
| if (!MultibyteCodec_Check(codec)) { | |
| PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); | |
| goto errorexit; | |
| } | |
| self->codec = ((MultibyteCodecObject *)codec)->codec; | |
| self->stream = stream; | |
| Py_INCREF(stream); | |
| self->pendingsize = 0; | |
| self->errors = internal_error_callback(errors); | |
| if (self->errors == NULL) | |
| goto errorexit; | |
| if (self->codec->decinit != NULL && | |
| self->codec->decinit(&self->state, self->codec->config) != 0) | |
| goto errorexit; | |
| Py_DECREF(codec); | |
| return (PyObject *)self; | |
| errorexit: | |
| Py_XDECREF(self); | |
| Py_XDECREF(codec); | |
| return NULL; | |
| } | |
| static int | |
| mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds) | |
| { | |
| return 0; | |
| } | |
| static int | |
| mbstreamreader_traverse(MultibyteStreamReaderObject *self, | |
| visitproc visit, void *arg) | |
| { | |
| if (ERROR_ISCUSTOM(self->errors)) | |
| Py_VISIT(self->errors); | |
| Py_VISIT(self->stream); | |
| return 0; | |
| } | |
| static void | |
| mbstreamreader_dealloc(MultibyteStreamReaderObject *self) | |
| { | |
| PyObject_GC_UnTrack(self); | |
| ERROR_DECREF(self->errors); | |
| Py_XDECREF(self->stream); | |
| Py_TYPE(self)->tp_free(self); | |
| } | |
| static PyTypeObject MultibyteStreamReader_Type = { | |
| PyVarObject_HEAD_INIT(NULL, 0) | |
| "MultibyteStreamReader", /* tp_name */ | |
| sizeof(MultibyteStreamReaderObject), /* tp_basicsize */ | |
| 0, /* tp_itemsize */ | |
| /* methods */ | |
| (destructor)mbstreamreader_dealloc, /* tp_dealloc */ | |
| 0, /* tp_print */ | |
| 0, /* tp_getattr */ | |
| 0, /* tp_setattr */ | |
| 0, /* tp_compare */ | |
| 0, /* tp_repr */ | |
| 0, /* tp_as_number */ | |
| 0, /* tp_as_sequence */ | |
| 0, /* tp_as_mapping */ | |
| 0, /* tp_hash */ | |
| 0, /* tp_call */ | |
| 0, /* tp_str */ | |
| PyObject_GenericGetAttr, /* tp_getattro */ | |
| 0, /* tp_setattro */ | |
| 0, /* tp_as_buffer */ | |
| Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | |
| | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
| 0, /* tp_doc */ | |
| (traverseproc)mbstreamreader_traverse, /* tp_traverse */ | |
| 0, /* tp_clear */ | |
| 0, /* tp_richcompare */ | |
| 0, /* tp_weaklistoffset */ | |
| 0, /* tp_iter */ | |
| 0, /* tp_iterext */ | |
| mbstreamreader_methods, /* tp_methods */ | |
| mbstreamreader_members, /* tp_members */ | |
| codecctx_getsets, /* tp_getset */ | |
| 0, /* tp_base */ | |
| 0, /* tp_dict */ | |
| 0, /* tp_descr_get */ | |
| 0, /* tp_descr_set */ | |
| 0, /* tp_dictoffset */ | |
| mbstreamreader_init, /* tp_init */ | |
| 0, /* tp_alloc */ | |
| mbstreamreader_new, /* tp_new */ | |
| }; | |
| /** | |
| * MultibyteStreamWriter object | |
| */ | |
| static int | |
| mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, | |
| PyObject *unistr) | |
| { | |
| PyObject *str, *wr; | |
| str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0); | |
| if (str == NULL) | |
| return -1; | |
| wr = PyObject_CallMethod(self->stream, "write", "O", str); | |
| Py_DECREF(str); | |
| if (wr == NULL) | |
| return -1; | |
| Py_DECREF(wr); | |
| return 0; | |
| } | |
| static PyObject * | |
| mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *strobj) | |
| { | |
| if (mbstreamwriter_iwrite(self, strobj)) | |
| return NULL; | |
| else | |
| Py_RETURN_NONE; | |
| } | |
| static PyObject * | |
| mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines) | |
| { | |
| PyObject *strobj; | |
| int i, r; | |
| if (!PySequence_Check(lines)) { | |
| PyErr_SetString(PyExc_TypeError, | |
| "arg must be a sequence object"); | |
| return NULL; | |
| } | |
| for (i = 0; i < PySequence_Length(lines); i++) { | |
| /* length can be changed even within this loop */ | |
| strobj = PySequence_GetItem(lines, i); | |
| if (strobj == NULL) | |
| return NULL; | |
| r = mbstreamwriter_iwrite(self, strobj); | |
| Py_DECREF(strobj); | |
| if (r == -1) | |
| return NULL; | |
| } | |
| Py_RETURN_NONE; | |
| } | |
| static PyObject * | |
| mbstreamwriter_reset(MultibyteStreamWriterObject *self) | |
| { | |
| const Py_UNICODE *pending; | |
| PyObject *pwrt; | |
| pending = self->pending; | |
| pwrt = multibytecodec_encode(self->codec, &self->state, | |
| &pending, self->pendingsize, self->errors, | |
| MBENC_FLUSH | MBENC_RESET); | |
| /* some pending buffer can be truncated when UnicodeEncodeError is | |
| * raised on 'strict' mode. but, 'reset' method is designed to | |
| * reset the pending buffer or states so failed string sequence | |
| * ought to be missed */ | |
| self->pendingsize = 0; | |
| if (pwrt == NULL) | |
| return NULL; | |
| if (PyString_Size(pwrt) > 0) { | |
| PyObject *wr; | |
| wr = PyObject_CallMethod(self->stream, "write", "O", pwrt); | |
| if (wr == NULL) { | |
| Py_DECREF(pwrt); | |
| return NULL; | |
| } | |
| } | |
| Py_DECREF(pwrt); | |
| Py_RETURN_NONE; | |
| } | |
| static PyObject * | |
| mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |
| { | |
| MultibyteStreamWriterObject *self; | |
| PyObject *stream, *codec = NULL; | |
| char *errors = NULL; | |
| if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter", | |
| streamkwarglist, &stream, &errors)) | |
| return NULL; | |
| self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0); | |
| if (self == NULL) | |
| return NULL; | |
| codec = PyObject_GetAttrString((PyObject *)type, "codec"); | |
| if (codec == NULL) | |
| goto errorexit; | |
| if (!MultibyteCodec_Check(codec)) { | |
| PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); | |
| goto errorexit; | |
| } | |
| self->codec = ((MultibyteCodecObject *)codec)->codec; | |
| self->stream = stream; | |
| Py_INCREF(stream); | |
| self->pendingsize = 0; | |
| self->errors = internal_error_callback(errors); | |
| if (self->errors == NULL) | |
| goto errorexit; | |
| if (self->codec->encinit != NULL && | |
| self->codec->encinit(&self->state, self->codec->config) != 0) | |
| goto errorexit; | |
| Py_DECREF(codec); | |
| return (PyObject *)self; | |
| errorexit: | |
| Py_XDECREF(self); | |
| Py_XDECREF(codec); | |
| return NULL; | |
| } | |
| static int | |
| mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds) | |
| { | |
| return 0; | |
| } | |
| static int | |
| mbstreamwriter_traverse(MultibyteStreamWriterObject *self, | |
| visitproc visit, void *arg) | |
| { | |
| if (ERROR_ISCUSTOM(self->errors)) | |
| Py_VISIT(self->errors); | |
| Py_VISIT(self->stream); | |
| return 0; | |
| } | |
| static void | |
| mbstreamwriter_dealloc(MultibyteStreamWriterObject *self) | |
| { | |
| PyObject_GC_UnTrack(self); | |
| ERROR_DECREF(self->errors); | |
| Py_XDECREF(self->stream); | |
| Py_TYPE(self)->tp_free(self); | |
| } | |
| static struct PyMethodDef mbstreamwriter_methods[] = { | |
| {"write", (PyCFunction)mbstreamwriter_write, | |
| METH_O, NULL}, | |
| {"writelines", (PyCFunction)mbstreamwriter_writelines, | |
| METH_O, NULL}, | |
| {"reset", (PyCFunction)mbstreamwriter_reset, | |
| METH_NOARGS, NULL}, | |
| {NULL, NULL}, | |
| }; | |
| static PyMemberDef mbstreamwriter_members[] = { | |
| {"stream", T_OBJECT, | |
| offsetof(MultibyteStreamWriterObject, stream), | |
| READONLY, NULL}, | |
| {NULL,} | |
| }; | |
| static PyTypeObject MultibyteStreamWriter_Type = { | |
| PyVarObject_HEAD_INIT(NULL, 0) | |
| "MultibyteStreamWriter", /* tp_name */ | |
| sizeof(MultibyteStreamWriterObject), /* tp_basicsize */ | |
| 0, /* tp_itemsize */ | |
| /* methods */ | |
| (destructor)mbstreamwriter_dealloc, /* tp_dealloc */ | |
| 0, /* tp_print */ | |
| 0, /* tp_getattr */ | |
| 0, /* tp_setattr */ | |
| 0, /* tp_compare */ | |
| 0, /* tp_repr */ | |
| 0, /* tp_as_number */ | |
| 0, /* tp_as_sequence */ | |
| 0, /* tp_as_mapping */ | |
| 0, /* tp_hash */ | |
| 0, /* tp_call */ | |
| 0, /* tp_str */ | |
| PyObject_GenericGetAttr, /* tp_getattro */ | |
| 0, /* tp_setattro */ | |
| 0, /* tp_as_buffer */ | |
| Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | |
| | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
| 0, /* tp_doc */ | |
| (traverseproc)mbstreamwriter_traverse, /* tp_traverse */ | |
| 0, /* tp_clear */ | |
| 0, /* tp_richcompare */ | |
| 0, /* tp_weaklistoffset */ | |
| 0, /* tp_iter */ | |
| 0, /* tp_iterext */ | |
| mbstreamwriter_methods, /* tp_methods */ | |
| mbstreamwriter_members, /* tp_members */ | |
| codecctx_getsets, /* tp_getset */ | |
| 0, /* tp_base */ | |
| 0, /* tp_dict */ | |
| 0, /* tp_descr_get */ | |
| 0, /* tp_descr_set */ | |
| 0, /* tp_dictoffset */ | |
| mbstreamwriter_init, /* tp_init */ | |
| 0, /* tp_alloc */ | |
| mbstreamwriter_new, /* tp_new */ | |
| }; | |
| /** | |
| * Exposed factory function | |
| */ | |
| static PyObject * | |
| __create_codec(PyObject *ignore, PyObject *arg) | |
| { | |
| MultibyteCodecObject *self; | |
| MultibyteCodec *codec; | |
| if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) { | |
| PyErr_SetString(PyExc_ValueError, "argument type invalid"); | |
| return NULL; | |
| } | |
| codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME); | |
| if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0) | |
| return NULL; | |
| self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type); | |
| if (self == NULL) | |
| return NULL; | |
| self->codec = codec; | |
| return (PyObject *)self; | |
| } | |
| static struct PyMethodDef __methods[] = { | |
| {"__create_codec", (PyCFunction)__create_codec, METH_O}, | |
| {NULL, NULL}, | |
| }; | |
| PyMODINIT_FUNC | |
| init_multibytecodec(void) | |
| { | |
| int i; | |
| PyObject *m; | |
| PyTypeObject *typelist[] = { | |
| &MultibyteIncrementalEncoder_Type, | |
| &MultibyteIncrementalDecoder_Type, | |
| &MultibyteStreamReader_Type, | |
| &MultibyteStreamWriter_Type, | |
| NULL | |
| }; | |
| if (PyType_Ready(&MultibyteCodec_Type) < 0) | |
| return; | |
| m = Py_InitModule("_multibytecodec", __methods); | |
| if (m == NULL) | |
| return; | |
| for (i = 0; typelist[i] != NULL; i++) { | |
| if (PyType_Ready(typelist[i]) < 0) | |
| return; | |
| Py_INCREF(typelist[i]); | |
| PyModule_AddObject(m, typelist[i]->tp_name, | |
| (PyObject *)typelist[i]); | |
| } | |
| if (PyErr_Occurred()) | |
| Py_FatalError("can't initialize the _multibytecodec module"); | |
| } |