123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 |
- {-# LANGUAGE OverloadedStrings #-}
- {-# LANGUAGE CPP #-}
- {- |
- This module uses HXT to transverse an HTML document using CSS selectors.
- The most important function here is 'findBySelector', it takes a CSS query and
- a string containing the HTML to look into,
- and it returns a list of the HTML fragments that matched the given query.
- Only a subset of the CSS spec is currently supported:
- * By tag name: /table td a/
-
- * By class names: /.container .content/
- * By Id: /#oneId/
- * By attribute: /[hasIt]/, /[exact=match]/, /[contains*=text]/, /[starts^=with]/, /[ends$=with]/
-
- * Union: /a, span, p/
-
- * Immediate children: /div > p/
- * Get jiggy with it: /div[data-attr=yeah] > .mon, .foo.bar div, #oneThing/
- -}
- module Yesod.Test.TransversingCSS (
- findBySelector,
- HtmlLBS,
- Query,
- -- * For HXT hackers
- -- | These functions expose some low level details that you can blissfully ignore.
- parseQuery,
- runQuery,
- Selector(..),
- SelectorGroup(..)
- )
- where
- import Yesod.Test.CssQuery
- import qualified Data.Text as T
- import Control.Applicative ((<$>), (<*>))
- import Text.XML
- import Text.XML.Cursor
- import qualified Data.ByteString.Lazy as L
- import qualified Text.HTML.DOM as HD
- import Text.Blaze.Html (toHtml)
- import Text.Blaze.Html.Renderer.String (renderHtml)
- type Query = T.Text
- type HtmlLBS = L.ByteString
- -- | Perform a css 'Query' on 'Html'. Returns Either
- --
- -- * Left: Query parse error.
- --
- -- * Right: List of matching Html fragments.
- findBySelector :: HtmlLBS -> Query -> Either String [String]
- findBySelector html query = (\x -> map (renderHtml . toHtml . node) . runQuery x)
- <$> (Right $ fromDocument $ HD.parseLBS html)
- <*> parseQuery query
- -- Run a compiled query on Html, returning a list of matching Html fragments.
- runQuery :: Cursor -> [[SelectorGroup]] -> [Cursor]
- runQuery html query = concatMap (runGroup html) query
- runGroup :: Cursor -> [SelectorGroup] -> [Cursor]
- runGroup c [] = [c]
- runGroup c (DirectChildren s:gs) = concatMap (flip runGroup gs) $ c $/ selectors s
- runGroup c (DeepChildren s:gs) = concatMap (flip runGroup gs) $ c $// selectors s
- selectors :: [Selector] -> Cursor -> [Cursor]
- selectors ss c
- | all (selector c) ss = [c]
- | otherwise = []
- selector :: Cursor -> Selector -> Bool
- selector c (ById x) = not $ null $ attributeIs "id" x c
- selector c (ByClass x) =
- case attribute "class" c of
- t:_ -> x `elem` T.words t
- [] -> False
- selector c (ByTagName t) = not $ null $ element (Name t Nothing Nothing) c
- selector c (ByAttrExists t) = not $ null $ hasAttribute (Name t Nothing Nothing) c
- selector c (ByAttrEquals t v) = not $ null $ attributeIs (Name t Nothing Nothing) v c
- selector c (ByAttrContains n v) =
- case attribute (Name n Nothing Nothing) c of
- t:_ -> v `T.isInfixOf` t
- [] -> False
- selector c (ByAttrStarts n v) =
- case attribute (Name n Nothing Nothing) c of
- t:_ -> v `T.isPrefixOf` t
- [] -> False
- selector c (ByAttrEnds n v) =
- case attribute (Name n Nothing Nothing) c of
- t:_ -> v `T.isSuffixOf` t
- [] -> False
|