tokenset.h 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. #if !defined tokenset_h
  2. #define tokenset_h
  3. /**
  4. * This file is part of uhferret.
  5. *
  6. * Author:: Peter Lane
  7. * Copyright:: Copyright 2011, Peter Lane.
  8. * License:: GPLv3
  9. *
  10. * uhferret is free software: you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation, either version 3 of the License, or
  13. * (at your option) any later version.
  14. *
  15. * uhferret is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public License
  21. * along with uhferret. If not, see <http://www.gnu.org/licenses/>.
  22. */
  23. #include <assert.h>
  24. #include <map>
  25. #include <string>
  26. #include <vector>
  27. /** A Token is a sequence of characters read in by a TokenReader
  28. * -- this class provides a dynamic storage for the token supporting
  29. * addition of characters
  30. * -- when finished, the token can be queried for its length and made into a string
  31. */
  32. class Token
  33. {
  34. public:
  35. Token ();
  36. ~Token ();
  37. void Erase ();
  38. void AddChar (char c);
  39. std::string GetString () const;
  40. int GetLength () const;
  41. private:
  42. void Grow ();
  43. char * _token; // storage for the token
  44. int _capacity; // size of the stored token
  45. int _top; // pointer to end of token
  46. };
  47. /** A TokenSet maps strings to token indices
  48. * -- this is for memory efficiency, ensuring every token's string is
  49. * stored once within the application
  50. */
  51. class TokenSet
  52. {
  53. public:
  54. TokenSet ();
  55. std::size_t GetIndexFor (std::string token);
  56. std::string GetStringFor (std::size_t token);
  57. void Clear ();
  58. void SetNextIndex (int index);
  59. void SetIndexString (std::string token, int index);
  60. private:
  61. std::map<std::string, std::size_t> _tokens;
  62. std::map<std::string, std::size_t>::const_iterator _tokens_it;
  63. std::size_t _nextindex; // next free index for new string
  64. std::map<std::size_t, std::string> _strings;
  65. std::map<std::size_t, std::string>::const_iterator _strings_it;
  66. };
  67. #endif