simple-mispelling-problem.html 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. <!DOCTYPE html><head><meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" /><meta name="keywords" content="GNU, Emacs, Libre Software, Hurd, Guile, Guix" /><meta name="description" content="GNUcode.me is a website focusing on libre software projects, especially the GNU project." /><link type="application/atom+xml" rel="alternate" title="GNUcode.me -- Feed" href="/feed.xml" /><a rel="me" href="https://fosstodon.org/@thegnuguy"></a><link type="text/css" href="css/footer.min.css" rel="stylesheet"></link><link type="text/css" href="css/header.min.css" rel="stylesheet"></link><link type="text/css" href="css/main.min.css" rel="stylesheet"></link><title>Simple Mispelling Problem — GNUcode.me</title></head><body><header><nav><ul><li><a href="index.html">GNUcode.me</a></li><li><a href="services.html">Services</a></li><li><a href="about.html">About</a></li><li><a href="business-ideas.html">Business-ideas</a></li></ul></nav></header><h1>Simple Mispelling Problem</h1><main><section class="basic-section-padding"><article><h3>by Joshua Branson — November 22, 2022</h3><div><p>Edit: Yes I am aware that I misspelled &quot;mispelling&quot;. I figure it's funny if I
  2. leave it as it is. :)</p><p>I had this simple coding problem that I wanted to solve. Here's the problem:</p><p>Suppose you are writing an guix service <a href="https://notabug.org/jbranso/guix/src/newOpensmtpdBranch/gnu/services/mail.scm">(like I happen to be)</a>, and you are
  3. sanitizing user input for various records. Suppose your user mispells an
  4. option. Wouldn't it be nice to include a nice helpful hint on what he probably
  5. did wrong?</p><pre><code class="language-scheme">(opensmtpd-option (option &quot;forany&quot;))</code></pre><p>error: (option &quot;forany&quot;) is invalid.
  6. hint: Try &quot;for rcpt-to&quot;, &quot;for domain&quot;, &quot;for local&quot;, &quot;for any&quot;, or &quot;for&quot;.</p><p>Using <code>string-prefix-length-ci</code>, I was able to construct a fairly naive
  7. prococedure that tries to guess what the user meant to type. Here's what I came
  8. up with:</p><pre><code class="language-scheme">;; if strings is (list &quot;auth&quot; &quot;for any&quot; &quot;from local&quot;)
  9. ;; Then this will return &quot;Try \&quot;auth\&quot;, \&quot;for any\&quot;, or \&quot;from local\&quot;.&quot;
  10. (define (try-string strings)
  11. (string-append &quot;Try &quot;
  12. (let loop ((strings strings))
  13. (cond ((= 1 (length strings))
  14. (string-append
  15. &quot;or \&quot;&quot; (car strings) &quot;\&quot;.\n&quot;))
  16. (else
  17. (string-append
  18. &quot;\&quot;&quot; (car strings) &quot;\&quot;, &quot;
  19. (loop (cdr strings))))))))
  20. ;; suppose string is &quot;for anys&quot;
  21. ;; and strings is (list &quot;for any&quot; &quot;for local&quot; &quot;for domain&quot;)
  22. ;; then hint-string will return &quot;Did you mean &quot;for any&quot;?&quot;
  23. (define* (hint-string string strings
  24. #:key (fieldname #f))
  25. (if (not (string? string))
  26. (try-string strings)
  27. (let loop ((current-max 1)
  28. (loop-strings strings)
  29. (hint-strings '()))
  30. (if (null? loop-strings)
  31. (cond ((= 1 (length hint-strings)) ;; only one worthwhile match
  32. (if fieldname
  33. (string-append &quot;Did you mean (&quot; fieldname &quot; \&quot;&quot;
  34. (car hint-strings) &quot;\&quot;) ?\n&quot;)
  35. (string-append &quot;Did you mean \&quot;&quot; (car hint-strings)
  36. &quot;\&quot;?\n&quot;)))
  37. (else (if (null? hint-strings)
  38. (try-string strings)
  39. (try-string hint-strings))))
  40. (let* ((element-string (car loop-strings))
  41. (element-max
  42. (string-prefix-length-ci element-string string)))
  43. (cond ((&gt; element-max current-max)
  44. (loop element-max (cdr loop-strings)
  45. (list element-string)))
  46. ((= element-max current-max)
  47. (loop current-max (cdr loop-strings)
  48. (cons element-string hint-strings)))
  49. (else (loop current-max
  50. (cdr loop-strings) hint-strings))))))))</code></pre><p>It won't recognize that &quot;or any&quot; or &quot;bor any&quot; should match &quot;for any&quot;, but for
  51. most mispellings, it should be half decent, provided the user got the first
  52. character right.</p><p>What do you all think? How would you write such a procedure?</p><p>EDIT: Well it turns out that the guix developers actually have a
  53. (string-closest) procedure. The relevant code can be found in
  54. (guix utils) and (guix combinators):</p><pre><code class="language-scheme">(define fold2
  55. (case-lambda
  56. ((proc seed1 seed2 lst)
  57. &quot;Like `fold', but with a single list and two seeds.&quot;
  58. (let loop ((result1 seed1)
  59. (result2 seed2)
  60. (lst lst))
  61. (if (null? lst)
  62. (values result1 result2)
  63. (call-with-values
  64. (lambda () (proc (car lst) result1 result2))
  65. (lambda (result1 result2)
  66. (loop result1 result2 (cdr lst)))))))
  67. ((proc seed1 seed2 lst1 lst2)
  68. &quot;Like `fold', but with two lists and two seeds.&quot;
  69. (let loop ((result1 seed1)
  70. (result2 seed2)
  71. (lst1 lst1)
  72. (lst2 lst2))
  73. (if (or (null? lst1) (null? lst2))
  74. (values result1 result2)
  75. (call-with-values
  76. (lambda () (proc (car lst1) (car lst2) result1 result2))
  77. (lambda (result1 result2)
  78. (loop result1 result2 (cdr lst1) (cdr lst2)))))))))
  79. (define (string-distance s1 s2)
  80. &quot;Compute the Levenshtein distance between two strings.&quot;
  81. ;; Naive implemenation
  82. (define loop
  83. (mlambda (as bt)
  84. (match as
  85. (() (length bt))
  86. ((a s ...)
  87. (match bt
  88. (() (length as))
  89. ((b t ...)
  90. (if (char=? a b)
  91. (loop s t)
  92. (1+ (min
  93. (loop as t)
  94. (loop s bt)
  95. (loop s t))))))))))
  96. (let ((c1 (string-&gt;list s1))
  97. (c2 (string-&gt;list s2)))
  98. (loop c1 c2)))
  99. (define* (string-closest trial tests #:key (threshold 3))
  100. &quot;Return the string from TESTS that is the closest from the TRIAL,
  101. according to 'string-distance'. If the TESTS are too far from TRIAL,
  102. according to THRESHOLD, then #f is returned.&quot;
  103. (identity ;discard second return value
  104. (fold2 (lambda (test closest minimal)
  105. (let ((dist (string-distance trial test)))
  106. (if (and (&lt; dist minimal) (&lt; dist threshold))
  107. (values test dist)
  108. (values closest minimal))))
  109. #f +inf.0
  110. tests)))</code></pre><p>A lot of the above code is a little bit above my head, but it sure looks cool.</p><p>And it actually works better than mine.:</p><pre><code class="language-scheme">;; old scheme code
  111. (display (hint-string &quot;bor any&quot; (list &quot;for any&quot; &quot;auth&quot; &quot;rdns&quot;)))
  112. Try &quot;for any&quot;, &quot;auth&quot;, or &quot;rdns&quot;.
  113. ;; It didn't match any string. :(
  114. ;; Let's try guix's (string-closest _) ...
  115. (string-closest &quot;bor any&quot; (list &quot;for any&quot; &quot;auth&quot; &quot;rdns&quot;))
  116. $1 = &quot;for any&quot;</code></pre><p>Awesome!</p></div></article></section></main><footer><p>© 2020 Joshua Branson. The text on this site is free culture under the Creative Commons Attribution Share-Alike 4.0 International license.</p><p>This website is build with Haunt, a static site generator written in Guile Scheme. Source code is <a href="https://notabug.org/jbranso/gnucode.me">available.</a></p><p>The color theme of this website is based off of the famous <a href="#3f3f3f" target="_blank">zenburn</a> theme.</p></footer></body>