Merge pull request #1038 from OwenSanzas/fix-xmltokener-unescapeentity

Fix input validation in XMLTokener.unescapeEntity()
This commit is contained in:
Sean Leary
2026-01-30 08:13:34 -06:00
committed by GitHub
3 changed files with 165 additions and 12 deletions

3
.gitignore vendored
View File

@@ -16,3 +16,6 @@ build
/gradlew
/gradlew.bat
.gitmodules
# ignore compiled class files
*.class

View File

@@ -151,33 +151,108 @@ public class XMLTokener extends JSONTokener {
/**
* Unescape an XML entity encoding;
* @param e entity (only the actual entity value, not the preceding & or ending ;
* @return
* @return the unescaped entity string
* @throws JSONException if the entity is malformed
*/
static String unescapeEntity(String e) {
static String unescapeEntity(String e) throws JSONException {
// validate
if (e == null || e.isEmpty()) {
return "";
}
// if our entity is an encoded unicode point, parse it.
if (e.charAt(0) == '#') {
int cp;
if (e.charAt(1) == 'x' || e.charAt(1) == 'X') {
// hex encoded unicode
cp = Integer.parseInt(e.substring(2), 16);
} else {
// decimal encoded unicode
cp = Integer.parseInt(e.substring(1));
if (e.length() < 2) {
throw new JSONException("Invalid numeric character reference: &#;");
}
return new String(new int[] {cp},0,1);
}
int cp = (e.charAt(1) == 'x' || e.charAt(1) == 'X')
? parseHexEntity(e)
: parseDecimalEntity(e);
return new String(new int[] {cp}, 0, 1);
}
Character knownEntity = entity.get(e);
if(knownEntity==null) {
if (knownEntity == null) {
// we don't know the entity so keep it encoded
return '&' + e + ';';
}
return knownEntity.toString();
}
/**
* Parse a hexadecimal numeric character reference (e.g., "&#xABC;").
* @param e entity string starting with '#' (e.g., "#x1F4A9")
* @return the Unicode code point
* @throws JSONException if the format is invalid
*/
private static int parseHexEntity(String e) throws JSONException {
// hex encoded unicode - need at least one hex digit after #x
if (e.length() < 3) {
throw new JSONException("Invalid hex character reference: missing hex digits in &#" + e.substring(1) + ";");
}
String hex = e.substring(2);
if (!isValidHex(hex)) {
throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";");
}
try {
return Integer.parseInt(hex, 16);
} catch (NumberFormatException nfe) {
throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";", nfe);
}
}
/**
* Parse a decimal numeric character reference (e.g., "&#123;").
* @param e entity string starting with '#' (e.g., "#123")
* @return the Unicode code point
* @throws JSONException if the format is invalid
*/
private static int parseDecimalEntity(String e) throws JSONException {
String decimal = e.substring(1);
if (!isValidDecimal(decimal)) {
throw new JSONException("Invalid decimal character reference: &#" + decimal + ";");
}
try {
return Integer.parseInt(decimal);
} catch (NumberFormatException nfe) {
throw new JSONException("Invalid decimal character reference: &#" + decimal + ";", nfe);
}
}
/**
* Check if a string contains only valid hexadecimal digits.
* @param s the string to check
* @return true if s is non-empty and contains only hex digits (0-9, a-f, A-F)
*/
private static boolean isValidHex(String s) {
if (s == null || s.isEmpty()) {
return false;
}
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
return false;
}
}
return true;
}
/**
* Check if a string contains only valid decimal digits.
* @param s the string to check
* @return true if s is non-empty and contains only digits (0-9)
*/
private static boolean isValidDecimal(String s) {
if (s == null || s.isEmpty()) {
return false;
}
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c < '0' || c > '9') {
return false;
}
}
return true;
}
/**
* <pre>{@code

View File

@@ -1426,6 +1426,81 @@ public class XMLTest {
assertEquals(jsonObject3.getJSONObject("color").getString("value"), "008E97");
}
/**
* Tests that empty numeric character reference &#; throws JSONException.
* Previously threw StringIndexOutOfBoundsException.
* Related to issue #1035
*/
@Test(expected = JSONException.class)
public void testEmptyNumericEntityThrowsJSONException() {
String xmlStr = "<a>&#;</a>";
XML.toJSONObject(xmlStr);
}
/**
* Tests that malformed decimal entity &#txx; throws JSONException.
* Previously threw NumberFormatException.
* Related to issue #1036
*/
@Test(expected = JSONException.class)
public void testInvalidDecimalEntityThrowsJSONException() {
String xmlStr = "<a>&#txx;</a>";
XML.toJSONObject(xmlStr);
}
/**
* Tests that empty hex entity &#x; throws JSONException.
* Validates proper input validation for hex entities.
*/
@Test(expected = JSONException.class)
public void testEmptyHexEntityThrowsJSONException() {
String xmlStr = "<a>&#x;</a>";
XML.toJSONObject(xmlStr);
}
/**
* Tests that invalid hex entity &#xGGG; throws JSONException.
* Validates hex digit validation.
*/
@Test(expected = JSONException.class)
public void testInvalidHexEntityThrowsJSONException() {
String xmlStr = "<a>&#xGGG;</a>";
XML.toJSONObject(xmlStr);
}
/**
* Tests that valid decimal numeric entity &#65; works correctly.
* Should decode to character 'A'.
*/
@Test
public void testValidDecimalEntity() {
String xmlStr = "<a>&#65;</a>";
JSONObject jsonObject = XML.toJSONObject(xmlStr);
assertEquals("A", jsonObject.getString("a"));
}
/**
* Tests that valid hex numeric entity &#x41; works correctly.
* Should decode to character 'A'.
*/
@Test
public void testValidHexEntity() {
String xmlStr = "<a>&#x41;</a>";
JSONObject jsonObject = XML.toJSONObject(xmlStr);
assertEquals("A", jsonObject.getString("a"));
}
/**
* Tests that valid uppercase hex entity &#X41; works correctly.
* Should decode to character 'A'.
*/
@Test
public void testValidUppercaseHexEntity() {
String xmlStr = "<a>&#X41;</a>";
JSONObject jsonObject = XML.toJSONObject(xmlStr);
assertEquals("A", jsonObject.getString("a"));
}
}