Main.hs 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. {-# LANGUAGE ParallelListComp, ScopedTypeVariables #-}
  2. -- Something like Cocke-Younger-Kasami CFG parsing algorithm in Haskell
  3. -- * No Chomsky Normal Form required, but Epsilons are not allowed.
  4. -- * Not optimized.
  5. module Main where
  6. import Data.Either
  7. import Data.List
  8. import Data.Array.IArray as A
  9. import Control.Arrow (second)
  10. import Control.Monad.Except
  11. -- Partition a number n into exactly l parts of sizes \in \{1..f\}
  12. -- 1st arg: the number to be partitioned
  13. -- 2nd arg: = no. of elts in a partitioning
  14. -- 3rd arg: max size of a single element of a partitioning
  15. posPartitionsL :: (Integral a) => a -> a -> a -> [[a]]
  16. posPartitionsL 0 _ _ = []
  17. posPartitionsL _ 0 _ = [[]]
  18. posPartitionsL n 1 f | f < n = []
  19. posPartitionsL n 1 _ = [[n]]
  20. posPartitionsL n l f =
  21. concat [ map (k:) (posPartitionsL (n-k) (l-1) f) | k <- [1..min n f] ]
  22. listcorrespondences :: Eq a => [[a]] -> [[a]] -> [[a]]
  23. listcorrespondences as bs = zipWith (Data.List.intersect) as bs
  24. -- Epsilon excluded.
  25. data CFG a =
  26. CFG { numnt :: Integer,
  27. rules :: [(Integer, Either [Integer] [a])] }
  28. deriving (Show)
  29. cykparse :: (Eq a) => CFG a -> [a] -> Except [Char] [Integer]
  30. cykparse cfg w = do
  31. c <- cyk cfg w
  32. return $ fst $ (A.!) c (length w, 1)
  33. -- A list of unique nonterminals and a detailed resolution of the ambiguity
  34. type CykStruct = Array (Int, Int) ([Integer], [(Integer, [(Int, Int)])])
  35. cyk :: Eq a => CFG a -> [a] -> Except [Char] CykStruct
  36. cyk _ [] = throwError "CYK called on empty string"
  37. cyk cfg (h:t) =
  38. let
  39. crules = [(x',i) | (i,x) <- rules cfg, isRight x, let x' = fromRight [] x]
  40. n = length (h:t)
  41. arr = A.array ((1,1), (n,n)) entries
  42. entries = [ ((1,i), (suitablents, map (\x -> (x,[])) suitablents))
  43. | i <- [1..n] | c <- (h:t)
  44. , let suitablents = [j | (x',j) <- crules, c`elem`x' ] ]
  45. in
  46. return $ cykparse' cfg arr n 2
  47. -- now part of library
  48. -- fromRight :: b -> Either a b -> b
  49. -- fromRight _ (Right x) = x
  50. -- fromRight d (Left _) = d
  51. -- fromLeft :: a -> Either a b -> a
  52. -- fromLeft _ (Left x) = x
  53. -- fromLeft d (Right _) = d
  54. cykparse' :: CFG a -> CykStruct
  55. -> Int -> Int -> CykStruct
  56. cykparse' _ a n j | n < j = a
  57. cykparse' cfg a n j =
  58. let
  59. syntacticRules = [(x',i) | (i,x) <- rules cfg, isLeft x, let x' = fromLeft [] x]
  60. a' = a // [ ((j,i),
  61. (nub $ map fst contents, contents))
  62. | i <- [1..n-j+1]
  63. , let contents = [ (nt, origins)
  64. | (x',nt) <- syntacticRules
  65. , length x' <= j
  66. , partit <- posPartitionsL j (length x') (j-1)
  67. , let origins = map (second (+i)) (zip partit (scanl (+) 0 partit))
  68. , let ntsequence :: [[Integer]] = map (\(k,s') -> fst ((A.!) a (k,s'))) origins
  69. , let intersects :: [[Integer]] = (listcorrespondences (map return x') ntsequence)
  70. , any (const True) intersects && not (any null intersects) ]
  71. ]
  72. // [ ((j,i), ([],[])) | i <- [n-j+2..n] ]
  73. in
  74. cykparse' cfg a' n (j+1)
  75. -- Example: Dyck_2 grammar
  76. dyckab :: CFG Char
  77. dyckab = CFG 3 [(0, Left [0,0]), (0, Left [1,2]), (0, Left [1,0,2]), (1, Right "a"), (2, Right "b")]
  78. main :: IO ()
  79. main = do
  80. putStrLn "~ Ready ~"
  81. sequence
  82. [
  83. do
  84. s <- getLine
  85. let g = dyckab
  86. let result = runExcept (cykparse g s)
  87. case result of
  88. Right x -> putStrLn (show x)
  89. Left y -> putStrLn ("Error: " ++ y)
  90. | _ <- [1..]]
  91. return ()