extmod/modubinascii: Rewrite mod_binascii_a2b_base64.

This implementation ignores invalid characters in the input. This allows
it to decode the output of b2a_base64, and also mimics the behavior of
CPython.
This commit is contained in:
Alex Robbins 2017-08-04 17:30:34 -05:00 committed by Paul Sokolovsky
parent 025e5f2b33
commit c89254fd0f
2 changed files with 59 additions and 42 deletions

View File

@ -105,54 +105,64 @@ mp_obj_t mod_binascii_unhexlify(mp_obj_t data) {
} }
MP_DEFINE_CONST_FUN_OBJ_1(mod_binascii_unhexlify_obj, mod_binascii_unhexlify); MP_DEFINE_CONST_FUN_OBJ_1(mod_binascii_unhexlify_obj, mod_binascii_unhexlify);
// If ch is a character in the base64 alphabet, and is not a pad character, then
// the corresponding integer between 0 and 63, inclusively, is returned.
// Otherwise, -1 is returned.
static int mod_binascii_sextet(byte ch) {
if (ch >= 'A' && ch <= 'Z') {
return ch - 'A';
} else if (ch >= 'a' && ch <= 'z') {
return ch - 'a' + 26;
} else if (ch >= '0' && ch <= '9') {
return ch - '0' + 52;
} else if (ch == '+') {
return 62;
} else if (ch == '/') {
return 63;
} else {
return -1;
}
}
mp_obj_t mod_binascii_a2b_base64(mp_obj_t data) { mp_obj_t mod_binascii_a2b_base64(mp_obj_t data) {
mp_buffer_info_t bufinfo; mp_buffer_info_t bufinfo;
mp_get_buffer_raise(data, &bufinfo, MP_BUFFER_READ); mp_get_buffer_raise(data, &bufinfo, MP_BUFFER_READ);
if (bufinfo.len % 4 != 0) { byte *in = bufinfo.buf;
vstr_t vstr;
vstr_init(&vstr, (bufinfo.len / 4) * 3 + 1); // Potentially over-allocate
byte *out = (byte *)vstr.buf;
uint shift = 0;
int nbits = 0; // Number of meaningful bits in shift
bool hadpad = false; // Had a pad character since last valid character
for (size_t i = 0; i < bufinfo.len; i++) {
if (in[i] == '=') {
if ((nbits == 2) || ((nbits == 4) && hadpad)) {
nbits = 0;
break;
}
hadpad = true;
}
int sextet = mod_binascii_sextet(in[i]);
if (sextet == -1) {
continue;
}
hadpad = false;
shift = (shift << 6) | sextet;
nbits += 6;
if (nbits >= 8) {
nbits -= 8;
out[vstr.len++] = (shift >> nbits) & 0xFF;
}
}
if (nbits) {
mp_raise_ValueError("incorrect padding"); mp_raise_ValueError("incorrect padding");
} }
vstr_t vstr;
byte *in = bufinfo.buf;
if (bufinfo.len == 0) {
vstr_init_len(&vstr, 0);
}
else {
vstr_init_len(&vstr, ((bufinfo.len / 4) * 3) - ((in[bufinfo.len-1] == '=') ? ((in[bufinfo.len-2] == '=') ? 2 : 1 ) : 0));
}
byte *out = (byte*)vstr.buf;
for (mp_uint_t i = bufinfo.len; i; i -= 4) {
char hold[4];
for (int j = 4; j--;) {
if (in[j] >= 'A' && in[j] <= 'Z') {
hold[j] = in[j] - 'A';
} else if (in[j] >= 'a' && in[j] <= 'z') {
hold[j] = in[j] - 'a' + 26;
} else if (in[j] >= '0' && in[j] <= '9') {
hold[j] = in[j] - '0' + 52;
} else if (in[j] == '+') {
hold[j] = 62;
} else if (in[j] == '/') {
hold[j] = 63;
} else if (in[j] == '=') {
if (j < 2 || i > 4) {
mp_raise_ValueError("incorrect padding");
}
hold[j] = 64;
} else {
mp_raise_ValueError("invalid character");
}
}
in += 4;
*out++ = (hold[0]) << 2 | (hold[1]) >> 4;
if (hold[2] != 64) {
*out++ = (hold[1] & 0x0F) << 4 | hold[2] >> 2;
if (hold[3] != 64) {
*out++ = (hold[2] & 0x03) << 6 | hold[3];
}
}
}
return mp_obj_new_str_from_vstr(&mp_type_bytes, &vstr); return mp_obj_new_str_from_vstr(&mp_type_bytes, &vstr);
} }
MP_DEFINE_CONST_FUN_OBJ_1(mod_binascii_a2b_base64_obj, mod_binascii_a2b_base64); MP_DEFINE_CONST_FUN_OBJ_1(mod_binascii_a2b_base64_obj, mod_binascii_a2b_base64);

View File

@ -21,6 +21,13 @@ print(binascii.a2b_base64(b'f4D/'))
print(binascii.a2b_base64(b'f4D+')) # convert '+' print(binascii.a2b_base64(b'f4D+')) # convert '+'
print(binascii.a2b_base64(b'MTIzNEFCQ0RhYmNk')) print(binascii.a2b_base64(b'MTIzNEFCQ0RhYmNk'))
# Ignore invalid characters and pad sequences
print(binascii.a2b_base64(b'Zm9v\n'))
print(binascii.a2b_base64(b'Zm\x009v\n'))
print(binascii.a2b_base64(b'Zm9v=='))
print(binascii.a2b_base64(b'Zm9v==='))
print(binascii.a2b_base64(b'Zm9v===YmFy'))
try: try:
print(binascii.a2b_base64(b'abc')) print(binascii.a2b_base64(b'abc'))
except ValueError: except ValueError: