Scala で XML を Parse する(添削希望)
Scala で XML を Parse してみたのですが、今ひとつカッコ悪いコードになってしまいました。
凄腕の Scala Hacker に添削して欲しい所です… orz
import scala.io.Source import scala.xml.{Node, NodeSeq} import scala.xml.parsing.XhtmlParser object Test { def main(args:Array[String]) = { val source = Source.fromFile("/opt/local/share/scala-2.8/doc/scala-devel-docs/api/index.html") val xhtml = XhtmlParser(source) source.close val titles = List( "div" -> compareAttr("id", "tpl"), "ol" -> compareAttr("class", "packages"), "li" -> compareAttr("class", "pack"), "ol" -> (compareAttr("class", "templates") orElse compareAttr("class", "packages")) ).foldLeft ( xhtml \\ "div" filter(compareAttr("id", "browser")) ) { (nodes, selector) => (nodes \ selector._1).filter(selector._2) } \ "li" map(node => (node \ "@title").text) titles foreach {println} } class compareAttr(attrPrefix: String, val text: String) extends PartialFunction[Node, Boolean] { val attr = "@" + attrPrefix override def apply(node: Node):Boolean = if ((node \ attr).text == text) true else false override def isDefinedAt(node: Node):Boolean = apply(node) } object compareAttr { def apply(attr: String, test: String): compareAttr = new compareAttr(attr, test) } }
Pair の List を foldLeft でまわす所は、素直に下記のように書いた方が良いきもするのですが、括弧の数が…。
val titles = (((((xhtml \\ "div" filter(compareAttr("id", "browser")) ) \ "div" filter(compareAttr("id", "tpl")) ) \ "ol" filter(compareAttr("class", "packages")) ) \ "li" filter(compareAttr("class", "pack")) ) \ "ol" filter(compareAttr("class", "templates") orElse compareAttr("class", "packages")) ) \ "li" map(node => (node \ "@title").text)
うう… XPath 使いたい。
PartialFunction を case を使って定義しなおしてみた。
import scala.io.Source import scala.xml.{Node, NodeSeq} import scala.xml.parsing.XhtmlParser object Test { def main(args:Array[String]) = { val source = Source.fromFile("/opt/local/share/scala-2.8/doc/scala-devel-docs/api/index.html") val xhtml = XhtmlParser(source) source.close val titles = List( "div" -> compareAttr("id", "browser"), "div" -> compareAttr("id", "tpl"), "ol" -> compareAttr("class", "packages"), "li" -> compareAttr("class", "pack"), "ol" -> (compareAttr("class", "templates") orElse compareAttr("class", "packages")) ).foldLeft ( xhtml \ "body" ) { (nodes, selector) => (nodes \ selector._1).filter(selector._2 orElse ignorePattern) } \ "li" map(node => (node \ "@title").text) titles foreach {println} } def compareAttr(attrPrefix: String, text: String) = { val attr = "@" + attrPrefix var pf: PartialFunction[Node, Boolean] = { case node if (node \ attr).text == text => true } pf } def ignorePattern: PartialFunction[Node, Boolean] = {case _ => false} }