大文字小文字なども原型を留めたい場合は色々設定が必要。
html2dom.java
// DOMパーサーはxercesを使う
DOMParser parser = new DOMParser(new HTMLConfiguration());
parser.setFeature("http://cyberneko.org/html/features/balance-tags", false);
parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); parser.setFeature("http://cyberneko.org/html/features/balance-tags/ignore-outside-content", true);parser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
parser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8");
parser.parse(new InputSource(new StringReader(layout)));
Document doc = parser.getDocument();