TransversingCSS.hs 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. {-# LANGUAGE OverloadedStrings #-}
  2. {-# LANGUAGE CPP #-}
  3. {- |
  4. This module uses HXT to transverse an HTML document using CSS selectors.
  5. The most important function here is 'findBySelector', it takes a CSS query and
  6. a string containing the HTML to look into,
  7. and it returns a list of the HTML fragments that matched the given query.
  8. Only a subset of the CSS spec is currently supported:
  9. * By tag name: /table td a/
  10. * By class names: /.container .content/
  11. * By Id: /#oneId/
  12. * By attribute: /[hasIt]/, /[exact=match]/, /[contains*=text]/, /[starts^=with]/, /[ends$=with]/
  13. * Union: /a, span, p/
  14. * Immediate children: /div > p/
  15. * Get jiggy with it: /div[data-attr=yeah] > .mon, .foo.bar div, #oneThing/
  16. -}
  17. module Yesod.Test.TransversingCSS (
  18. findBySelector,
  19. HtmlLBS,
  20. Query,
  21. -- * For HXT hackers
  22. -- | These functions expose some low level details that you can blissfully ignore.
  23. parseQuery,
  24. runQuery,
  25. Selector(..),
  26. SelectorGroup(..)
  27. )
  28. where
  29. import Yesod.Test.CssQuery
  30. import qualified Data.Text as T
  31. import Control.Applicative ((<$>), (<*>))
  32. import Text.XML
  33. import Text.XML.Cursor
  34. import qualified Data.ByteString.Lazy as L
  35. import qualified Text.HTML.DOM as HD
  36. import Text.Blaze.Html (toHtml)
  37. import Text.Blaze.Html.Renderer.String (renderHtml)
  38. type Query = T.Text
  39. type HtmlLBS = L.ByteString
  40. -- | Perform a css 'Query' on 'Html'. Returns Either
  41. --
  42. -- * Left: Query parse error.
  43. --
  44. -- * Right: List of matching Html fragments.
  45. findBySelector :: HtmlLBS -> Query -> Either String [String]
  46. findBySelector html query = (\x -> map (renderHtml . toHtml . node) . runQuery x)
  47. <$> (Right $ fromDocument $ HD.parseLBS html)
  48. <*> parseQuery query
  49. -- Run a compiled query on Html, returning a list of matching Html fragments.
  50. runQuery :: Cursor -> [[SelectorGroup]] -> [Cursor]
  51. runQuery html query = concatMap (runGroup html) query
  52. runGroup :: Cursor -> [SelectorGroup] -> [Cursor]
  53. runGroup c [] = [c]
  54. runGroup c (DirectChildren s:gs) = concatMap (flip runGroup gs) $ c $/ selectors s
  55. runGroup c (DeepChildren s:gs) = concatMap (flip runGroup gs) $ c $// selectors s
  56. selectors :: [Selector] -> Cursor -> [Cursor]
  57. selectors ss c
  58. | all (selector c) ss = [c]
  59. | otherwise = []
  60. selector :: Cursor -> Selector -> Bool
  61. selector c (ById x) = not $ null $ attributeIs "id" x c
  62. selector c (ByClass x) =
  63. case attribute "class" c of
  64. t:_ -> x `elem` T.words t
  65. [] -> False
  66. selector c (ByTagName t) = not $ null $ element (Name t Nothing Nothing) c
  67. selector c (ByAttrExists t) = not $ null $ hasAttribute (Name t Nothing Nothing) c
  68. selector c (ByAttrEquals t v) = not $ null $ attributeIs (Name t Nothing Nothing) v c
  69. selector c (ByAttrContains n v) =
  70. case attribute (Name n Nothing Nothing) c of
  71. t:_ -> v `T.isInfixOf` t
  72. [] -> False
  73. selector c (ByAttrStarts n v) =
  74. case attribute (Name n Nothing Nothing) c of
  75. t:_ -> v `T.isPrefixOf` t
  76. [] -> False
  77. selector c (ByAttrEnds n v) =
  78. case attribute (Name n Nothing Nothing) c of
  79. t:_ -> v `T.isSuffixOf` t
  80. [] -> False