mro
/
ShaarliGo
espejo de https://code.mro.name/mro/ShaarliGo


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
							//
// Copyright (C) 2018-2021 Marcus Rohrmoser, http://purl.mro.name/ShaarliGo
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
//

package main

import (
	"encoding/xml"
	"net/http"
	"net/url"
	"path"
	"sort"
	"strconv"
	"strings"
	"time"

	"golang.org/x/text/language"
	"golang.org/x/text/search"
)

// better: https://stackoverflow.com/questions/24836044/case-insensitive-string-search-in-golang
func rankEntryTerms(entry *Entry, terms []string, matcher *search.Matcher) int {
	// defer un(trace("ranker"))
	parts := [2]string{"", ""}
	if nil != entry {
		if nil != entry.Content {
			parts[0] = entry.Content.Body
		}
		parts[1] = entry.Title.Body
	}
	rank := 0
	for _, term := range terms {
		if strings.HasPrefix(term, "#") {
			t := term[1:]
			for _, cat := range entry.Categories {
				if idx, _ := matcher.IndexString(cat.Term, t); idx >= 0 {
					rank += 5
				}
			}
		}
		for weight, txt := range parts {
			if idx, _ := matcher.IndexString(txt, term); idx >= 0 {
				rank += 1 + weight
			}
		}
	}
	return rank
}

func (app *Server) handleSearch() http.HandlerFunc {
	return func(w http.ResponseWriter, r *http.Request) {
		now := time.Now()

		// evtl. check permission to search (non-logged-in visitor?)
		if !app.cfg.IsConfigured() {
			http.Redirect(w, r, cgiName+"/config", http.StatusPreconditionFailed)
			return
		}

		switch r.Method {
		case http.MethodGet:
			app.KeepAlive(w, r, now)
			// pull out parameters q, offset, limit
			query := r.URL.Query()
			if q := query["q"]; q != nil && 0 < len(q) {
				terms := strings.Fields(strings.TrimSpace(strings.Join(q, " ")))
				if 0 == len(terms) {
					http.Redirect(w, r, path.Join("..", "..", uriPub, uriPosts)+"/", http.StatusFound)
					return
				}
				limit := max(1, app.cfg.LinksPerPage)
				offset := 0
				if o := query["offset"]; o != nil {
					offset, _ = strconv.Atoi(o[0]) // just ignore conversion errors. 0 is a fine fallback
				}
				qu := cgiName + "/search/" + "?" + "q" + "=" + url.QueryEscape(strings.Join(terms, " "))

				catScheme := Iri(app.url.ResolveReference(mustParseURL(path.Join(uriPub, uriTags))).String() + "/")

				feed, _ := LoadFeed()

				lang := language.Make("de") // todo: should come from the entry, feed, settings, default (in that order)
				matcher := search.New(lang, search.IgnoreDiacritics, search.IgnoreCase)
				ret := feed.Search(func(entry *Entry) int { return rankEntryTerms(entry, terms, matcher) })

				ret.XmlBase = Iri(app.url.String())
				ret.Id = Id(app.url.ResolveReference(mustParseURL(qu)).String())
				ret.Generator = &Generator{Uri: myselfNamespace, Version: version, Body: "🌺 ShaarliGo"}
				ret.XmlNSShaarliGo = myselfNamespace
				ret.SearchTerms = strings.Join(q, " ") // rather use http://www.opensearch.org/Specifications/OpenSearch/1.1#Example_of_OpenSearch_response_elements_in_Atom_1.0
				ret.XmlNSOpenSearch = "http://a9.com/-/spec/opensearch/1.1/"

				// paging / RFC5005
				clamp := func(x int) int { return min(len(ret.Entries), x) }
				offset = clamp(max(0, offset))
				count := len(ret.Entries)
				ret.Links = append(ret.Links, Link{Rel: relSelf, Href: qu + "&" + "offset" + "=" + strconv.Itoa(offset), Title: strconv.Itoa(1 + offset/limit)})
				if count > limit {
					ret.Links = append(ret.Links, Link{Rel: relFirst, Href: qu, Title: strconv.Itoa(1 + 0)})
					ret.Links = append(ret.Links, Link{Rel: relLast, Href: qu + "&" + "offset" + "=" + strconv.Itoa(count-(count%limit)), Title: strconv.Itoa(1 + count/limit)})
					if intPrev := offset - limit; intPrev >= 0 {
						ret.Links = append(ret.Links, Link{Rel: relPrevious, Href: qu + "&" + "offset" + "=" + strconv.Itoa(intPrev), Title: strconv.Itoa(1 + intPrev/limit)})
					}
					if intNext := offset + limit; intNext < count {
						ret.Links = append(ret.Links, Link{Rel: relNext, Href: qu + "&" + "offset" + "=" + strconv.Itoa(intNext), Title: strconv.Itoa(1 + intNext/limit)})
					}
					ret.Entries = ret.Entries[offset:clamp(offset+limit)]
				}
				// prepare entries for Atom publication
				for _, item := range ret.Entries {
					// change entries for output but don't save the change:
					selfURL := mustParseURL(path.Join(uriPub, uriPosts, string(item.Id)) + "/")
					editURL := strings.Join([]string{cgiName, "?post=", selfURL.String()}, "")
					item.Id = Id(app.url.ResolveReference(selfURL).String()) // expand XmlBase as required by https://validator.w3.org/feed/check.cgi?url=
					item.Links = append(item.Links,
						Link{Rel: relSelf, Href: selfURL.String()},
						Link{Rel: relEdit, Href: editURL},
					)
					for i := range item.Categories {
						item.Categories[i].Scheme = catScheme
					}
					if item.Updated.IsZero() {
						item.Updated = item.Published
					}
					if item.Updated.After(ret.Updated) {
						ret.Updated = item.Updated
					}
				}
				ret.Categories = AggregateCategories(ret.Entries)
				if ret.Updated.IsZero() {
					ret.Updated = iso8601(now)
				}

				w.Header().Set("Content-Type", "text/xml; charset=utf-8")
				enc := xml.NewEncoder(w)
				enc.Indent("", "  ")
				if err := xmlEncodeWithXslt(ret, "../../themes/current/posts.xslt", enc); err == nil {
					if err := enc.Flush(); err == nil {
						return
					}
				}
			}
		}
	}
}

func min(x, y int) int {
	if x < y {
		return x
	}
	return y
}
func max(x, y int) int {
	if x > y {
		return x
	}
	return y
}

func (feed Feed) Search(ranker func(*Entry) int) Feed {
	defer un(trace("Feed.Search"))
	feed.Entries = searchEntries(feed.Entries, ranker)
	return feed
}

type search_results struct {
	Ranks   []int
	Entries []*Entry
}

func (r search_results) Len() int { return len(r.Ranks) }
func (r search_results) Less(i, j int) bool {
	if r.Ranks[i] == r.Ranks[j] {
		return ByPublishedDesc(r.Entries).Less(i, j)
	}
	return r.Ranks[i] > r.Ranks[j]
}
func (r search_results) Swap(i, j int) {
	r.Ranks[i], r.Ranks[j] = r.Ranks[j], r.Ranks[i]
	r.Entries[i], r.Entries[j] = r.Entries[j], r.Entries[i]
}

func searchEntries(entries []*Entry, ranker func(*Entry) int) []*Entry {
	r := search_results{
		Ranks:   make([]int, len(entries)),
		Entries: entries,
	}
	// could be concurrent:
	for idx, ent := range entries {
		r.Ranks[idx] = ranker(ent)
	}
	// sort entries according to rank
	sort.Sort(r)
	cut := sort.Search(len(r.Ranks), func(idx int) bool { return r.Ranks[idx] <= 0 })
	return r.Entries[0:cut]
}

//