Skip to content

Commit

Permalink
Further simplify the codebase. Reduce redundancy and unnecessary abst…
Browse files Browse the repository at this point in the history
…raction.
  • Loading branch information
ScalaWilliam committed Aug 25, 2016
1 parent e9974f0 commit c7bb453
Show file tree
Hide file tree
Showing 13 changed files with 157 additions and 153 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ lazy val core = project.settings(
"xmlunit" % "xmlunit" % "1.6",
"org.codehaus.woodstox" % "woodstox-core-asl" % "4.4.1",
"org.compass-project" % "compass" % "2.2.0",
"org.scalatest" %% "scalatest" % "2.2.6" % "test",
"org.scalatest" %% "scalatest" % "3.0.0" % "test",
"org.scala-lang.modules" %% "scala-xml" % "1.0.5"
),
name := "xs4s",
Expand Down
44 changes: 44 additions & 0 deletions core/src/main/scala/com/scalawilliam/xs4s/Implicits.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package com.scalawilliam.xs4s

import javax.xml.stream.XMLEventReader
import javax.xml.stream.events.XMLEvent

import scala.xml.Elem

/**
* Created by me on 25/08/2016.
*/
trait Implicits extends Scanner.Implicits {

implicit class NodeSeqExtensions(nodeSeq: scala.xml.NodeSeq) {
def ===(hasValue: String): Boolean =
nodeSeq.text == hasValue

def !==(notValue: String): Boolean =
nodeSeq.text != notValue
}

implicit class RichXMLEventIterator(input: Iterator[XMLEvent]) {
def buildXml: Iterator[XmlElementBuilder] = {
input.scan(XmlElementBuilder.Scanner)
}

/**
* Note this is unsafe. If the iterator is infinite then we'll never return.
*/
def buildElement: Option[Elem] = {
input.scanCollect(XmlElementBuilder.Scanner).toStream.lastOption
}
}

implicit class RichXMLEventReader(eventReader: XMLEventReader) extends scala.collection.Iterator[XMLEvent] {
def hasNext = eventReader.hasNext

def next() = eventReader.nextEvent()

def buildElement = eventReader.toIterator.buildElement
}

}

object Implicits extends Implicits
15 changes: 0 additions & 15 deletions core/src/main/scala/com/scalawilliam/xs4s/NodeSeqExtensions.scala

This file was deleted.

34 changes: 34 additions & 0 deletions core/src/main/scala/com/scalawilliam/xs4s/Scanner.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package com.scalawilliam.xs4s

/**
* Created by me on 25/08/2016.
*/
trait Scanner[In, State, Out] {
def initial: State

def scan(state: State, element: In): State

def collect: PartialFunction[State, Out]
}

object Scanner {

trait Implicits {

implicit class RichIterator[T](iterator: Iterator[T]) {
def scan[S, O](scanner: Scanner[T, S, O]): Iterator[S] = {
iterator.scanLeft(scanner.initial)(scanner.scan)
}

def scanCollect[S, O](scanner: Scanner[T, S, O]): Iterator[O] = {
iterator.scanLeft(scanner.initial)(scanner.scan).collect(scanner.collect)
}
}

}

object Implicits extends Implicits {

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ object XmlElementBuilder {

def initial: XmlElementBuilder = NoElement

object Scan {
object Scanner extends Scanner[XMLEvent, XmlElementBuilder, Elem] {
def initial: XmlElementBuilder = NoElement

def scan(xmlElementBuilder: XmlElementBuilder, xMLEvent: XMLEvent): XmlElementBuilder =
Expand Down
44 changes: 12 additions & 32 deletions core/src/main/scala/com/scalawilliam/xs4s/XmlElementExtractor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,15 @@ import javax.xml.stream.XMLInputFactory
import javax.xml.stream.events.{EndElement, StartElement, XMLEvent}

import com.scalawilliam.xs4s.XmlElementBuilder.{FinalElement, NoElement}
import com.scalawilliam.xs4s.XmlElementExtractor.CollectorDefinition

import scala.xml.Elem

object XmlElementExtractor {

object IteratorCreator {

implicit class addIteratorCreateorToBasicElementExtractorBuilder[T](beeb: XmlElementExtractor[T]) {
def processInputStream(inputStream: InputStream)(implicit xMLInputFactory: XMLInputFactory): Iterator[T] = {
val reader = xMLInputFactory.createXMLEventReader(inputStream)
import XmlEventIterator._
reader.scanLeft(beeb.EventProcessor.Scan.initial)(beeb.EventProcessor.Scan.scan).collect(beeb.EventProcessor.Scan.collect)
}
def collectElements[T](p: List[String] => Boolean) = {
XmlElementExtractor {
case l if p(l) => identity
}

}

/**
* Collector Definition: if the 'xpath' of the current position is equal to the
* the List[String] part of this parameter, then we begin capturing
* its element. Once that element is captured, we call _2(element)
* and change state to Captured().
*/
type CollectorDefinition[T] = PartialFunction[List[String], Elem => T]

def collectElements[T](pf: CollectorDefinition[T]) = {
XmlElementExtractor(pf)
}

}
Expand All @@ -42,26 +23,25 @@ object XmlElementExtractor {
*
* @tparam T Return type of these capture converters
*/
case class XmlElementExtractor[T](pf: CollectorDefinition[T]) {
case class XmlElementExtractor[T](pf: PartialFunction[List[String], Elem => T]) {

def initial: EventProcessor = EventProcessor.initial

sealed trait EventProcessor {
def process: PartialFunction[XMLEvent, EventProcessor]
}

object EventProcessor {
ep =>

object Scan {
def initial = ep.initial
object Scan extends Scanner[XMLEvent, EventProcessor, T] {
def initial = EventProcessor.initial

def scan(eventProcessor: EventProcessor, xMLEvent: XMLEvent) = eventProcessor.process(xMLEvent)
def scan(eventProcessor: EventProcessor, xMLEvent: XMLEvent) = eventProcessor.process(xMLEvent)

def collect: PartialFunction[EventProcessor, T] = {
case Captured(_, e) => e
}
def collect: PartialFunction[EventProcessor, T] = {
case EventProcessor.Captured(_, e) => e
}
}

object EventProcessor {

def initial: EventProcessor = ProcessingStack()

Expand Down
16 changes: 0 additions & 16 deletions core/src/main/scala/com/scalawilliam/xs4s/XmlEventIterator.scala

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import java.io.ByteArrayInputStream
import javax.xml.stream.XMLInputFactory

import org.scalatest.{Inside, Matchers, WordSpec}

import Implicits._
import scala.xml.Elem

/**
Expand Down Expand Up @@ -33,12 +33,8 @@ class BasicElementExtractorBuilderSpec extends WordSpec with Matchers with Insid
val is = new ByteArrayInputStream(input.getBytes("UTF-8"))
try {
val streamer = inputFactory.createXMLEventReader(is)
try {
import XmlEventIterator._
val items = streamer.toIterator.scanLeft(instance.EventProcessor.Scan.initial)(_.process(_)).toList
val captures = items.collect(instance.EventProcessor.Scan.collect)
captures.toVector
} finally streamer.close()
try streamer.toIterator.scanCollect(instance.Scan).toVector
finally streamer.close()
} finally is.close()
}

Expand Down
15 changes: 10 additions & 5 deletions core/src/test/scala/com/scalawilliam/xs4s/ElementBuilderSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@ package com.scalawilliam.xs4s

import java.io.ByteArrayInputStream
import javax.xml.stream.XMLInputFactory
import org.scalatest.{Inside, Inspectors, Matchers, WordSpec}

import org.scalatest.Inside._
import org.scalatest.Matchers._
import org.scalatest.OptionValues._
import org.scalatest.WordSpec

import scala.xml._

class ElementBuilderSpec extends WordSpec with Matchers with Inspectors with Inside {
class ElementBuilderSpec extends WordSpec {

/**
* The purpose of ElementBuilder is to
Expand All @@ -28,8 +33,8 @@ class ElementBuilderSpec extends WordSpec with Matchers with Inspectors with Ins
val is = new ByteArrayInputStream(input.getBytes("UTF-8"))
val inputFactory = XMLInputFactory.newInstance()
val streamer = inputFactory.createXMLEventReader(is)
import com.scalawilliam.xs4s.elementbuilder.eventReaderExtractors
val tree = streamer.blockingElement.next()
import com.scalawilliam.xs4s.Implicits._
val tree = streamer.buildElement.value

inside(tree) {
case Elem(prefix, label, attributes, scope, child@_*) =>
Expand Down Expand Up @@ -85,4 +90,4 @@ class ElementBuilderSpec extends WordSpec with Matchers with Inspectors with Ins

}

}
}
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
package com.scalawilliam.xs4s.examples

import java.io.{File, FileInputStream}
import java.io.FileReader
import javax.xml.stream.XMLInputFactory

import com.scalawilliam.xs4s.Implicits._
import com.scalawilliam.xs4s.XmlElementExtractor

object ComputeBritainsRegionalMinimumParkingCosts extends App {

implicit val xmlInputfactory = XMLInputFactory.newInstance()
val xmlInputfactory = XMLInputFactory.newInstance()

// http://data.gov.uk/dataset/car-parks
val splitter = XmlElementExtractor.collectElements { case l if l.last == "CarPark" => identity }

val regionMinCosts = for {
i <- (1 to 8).par
file = new File(s"downloads/carparks-data/CarParkData_$i.xml")
carPark <- {
import XmlElementExtractor.IteratorCreator._
splitter.processInputStream(new FileInputStream(file))
}
regionName <- carPark \\ "RegionName" map (_.text)
minCost <- (carPark \\ "MinCostPence") map (_.text.toInt)
if minCost > 0
} yield regionName -> minCost
val splitter = XmlElementExtractor.collectElements(_.last == "CarPark")

val regionMinCosts = (1 to 8).par.flatMap { i =>
val fileReader = new FileReader(s"downloads/carparks-data/CarParkData_$i.xml")
val reader = xmlInputfactory.createXMLEventReader(fileReader)
try {
(for {
carPark <- reader.toIterator.scanCollect(splitter.Scan)
regionName <- carPark \\ "RegionName" map (_.text)
minCost <- (carPark \\ "MinCostPence") map (_.text.toInt)
if minCost > 0
} yield regionName -> minCost).toList
} finally reader.close()
}

val regionMinimumParkingCosts = regionMinCosts.toList
.groupBy { case (region, cost) => region }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,23 @@ import java.net.URL
import javax.xml.stream.XMLInputFactory

import com.scalawilliam.xs4s.XmlElementExtractor
import com.scalawilliam.xs4s.Implicits._

object FindMostPopularWikipediaKeywords extends App {

implicit val xmlInputfactory = XMLInputFactory.newInstance()
val xmlInputFactory = XMLInputFactory.newInstance()
// Wikipedia abstracts - 4GB
val url = new URL("https://dumps.wikimedia.org/enwiki/20140903/enwiki-20140903-abstract.xml")
val inputStream = url.openStream()
val xmlEventReader = xmlInputFactory.createXMLEventReader(inputStream)

// builder that extracts all the anchors
val anchorSplitter = XmlElementExtractor.collectElements { case l if l.last == "anchor" => identity }
val anchorSplitter = XmlElementExtractor.collectElements(_.last == "anchor")

val anchors = {
import XmlElementExtractor.IteratorCreator._
val anchorsStream = anchorSplitter.processInputStream(url.openStream())
val anchorsStream = xmlEventReader
.toIterator
.scanCollect(anchorSplitter.Scan)
// add 'full' as an argument to go through the whole stream
if (args contains "full") {
anchorsStream
Expand Down Expand Up @@ -48,5 +52,6 @@ object FindMostPopularWikipediaKeywords extends App {
result
}

xmlEventReader.close()

}
Loading

0 comments on commit c7bb453

Please sign in to comment.