- 01
- 02
- 03
- 04
- 05
- 06
- 07
- 08
- 09
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
public static void main(String[] args) throws Exception {
// ProblemFactory in action...
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
docBuilderFactory.setValidating(false);
docBuilderFactory.setNamespaceAware(false);
DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
// I really don't want to download that stupid DTD from w3c.org
docBuilder.setEntityResolver(new EntityResolver() {
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException {
return new InputSource(new StringReader(""));
}
});
// Just fine
Document doc = docBuilder.parse("http://govnokod.ru/comments");
// ProblemFactory again
XPathFactory xpathFactory = XPathFactory.newInstance();
XPath xpath = xpathFactory.newXPath();
// Just fine
NodeList nodes = (NodeList)xpath.evaluate("//li[@class='hentry']", doc, XPathConstants.NODESET);
Pattern topicUriRegex = Pattern.compile("^.*/(\\d+)$");
// This is Java, not C. Why I need to write that shitty loop?!
for (int i=0, n=nodes.getLength(); i<n; i++) {
Node node = nodes.item(i);
String author = xpath.evaluate(".//strong[@class='entry-author']/a/text()", node);
String language = xpath.evaluate(".//a[@rel='chapter']/text()", node);
String topicUri = xpath.evaluate(".//a[@rel='bookmark'][@class='entry-title']/@href", node);
Matcher m = topicUriRegex.matcher(topicUri);
String topicId = m.matches() ? m.group(1) : "неизвестный говнокод";
String text = xpath.evaluate(".//div[@class='entry-comment']", node);
System.out.println("==== " + author + " наложил в " + topicId + " (" + language + ") ====");
System.out.println(text);
System.out.println("");
}
}
bormand 24.08.2012 12:06 # 0
http://pastebin.com/r7PftDat
victor-homyakov 27.08.2012 19:52 # 0