metadata.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. /*
  2. Package metadata provides procedures to detect, extract and parse metadata
  3. headers from arbitrary byte buffers.
  4. Example document:
  5. date = "2024-03-02"
  6. site = "example.com"
  7. total = 76373
  8. The empty line above ends the metadata block.
  9. These last two lines are not metadata, they are the document's content.
  10. Unlike other front-matter processors, the procedures provided in this package
  11. do not use delimiters/fences to separate metadata from content.
  12. The metadata block starts on the first byte of the buffer, and extends up to
  13. the first occurrence of two consecutive newline characters (ignoring carriage
  14. return characters for compatibility).
  15. If the first byte of the buffer is not a valid character for a key in the
  16. language that the metadata block is written, the entire buffer is treated as
  17. normal content and the metadata block is presumed to not exist/be empty.
  18. The metadata block may pass the above detection heuristic but fail to parse
  19. correctly. The programmer must then decide whether to treat this error as an
  20. error, or to ignore the error and treat the metadata block as absent (in other
  21. words, to treat the block as document content).
  22. The idiomatic way to prevent attempts to parse first line paragraphs as
  23. metadata is to start the buffer with an empty line or white space character.
  24. Empty, non-nil maps are returned when there is no metadata block to prevent
  25. nil dereference errors.
  26. Limitations: at present, only TOML is recognized as metadata.
  27. */
  28. package metadata
  29. import "cdop.pt/go/free/platepipe/metadata/toml"
  30. // IsPresent heuristically checks if metadata in present in the buffer.
  31. //
  32. // If the buffer seems to have metadata, IsPresent will return true and the
  33. // position of the first content byte in the buffer. This is useful for slicing
  34. // the buffer for further processing, without imposing any memory allocation
  35. // penalties.
  36. func IsPresent(buf []byte) (bool, int) {
  37. bufSz := len(buf)
  38. if bufSz == 0 {
  39. return false, 0
  40. }
  41. if !isStartOfKey(buf[0]) {
  42. return false, 0
  43. }
  44. last := buf[0]
  45. for i := 1; i < bufSz; i++ {
  46. if buf[i] == '\r' {
  47. continue
  48. }
  49. if buf[i] == '\n' && last == '\n' {
  50. return true, i + 1
  51. }
  52. last = buf[i]
  53. }
  54. return false, 0
  55. }
  56. func isStartOfKey(b byte) bool {
  57. return b == '\'' || b == '"' ||
  58. (b >= 'a' && b <= 'z') ||
  59. (b >= 'A' && b <= 'Z') ||
  60. (b >= '0' && b <= '9')
  61. }
  62. // FromTomlBuffer converts a TOML encoded buffer to a key/value map.
  63. //
  64. // FromTomlBuffer is meant to receive a slice of the original buffer, as in
  65. // the following example:
  66. //
  67. // present, pos := IsPresent(buf)
  68. // if present {
  69. // data, _ := FromTomlBuffer(buf[:pos])
  70. // ProcessContent(buf[pos:])
  71. // }
  72. func FromTomlBuffer(buf []byte) (map[string]any, error) {
  73. ret := map[string]any{}
  74. err := toml.Parse(buf, &ret)
  75. if err != nil {
  76. return map[string]any{}, err
  77. }
  78. return ret, nil
  79. }