feedwriter.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. //
  2. // Copyright (C) 2017-2021 Marcus Rohrmoser, http://purl.mro.name/ShaarliGo
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU General Public License
  15. // along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. //
  17. package main
  18. import (
  19. "encoding/json"
  20. "encoding/xml"
  21. "errors"
  22. "fmt"
  23. "io/ioutil"
  24. "log"
  25. "net/url"
  26. "os"
  27. "path"
  28. "path/filepath"
  29. "regexp"
  30. "sort"
  31. "strconv"
  32. "strings"
  33. "time"
  34. )
  35. func mustParseURL(u string) *url.URL {
  36. if ret, err := url.Parse(u); err != nil {
  37. panic("Cannot parse URL '" + u + "' " + err.Error())
  38. } else {
  39. return ret
  40. }
  41. }
  42. const cgiName = "shaarligo.cgi"
  43. const dirThemes = "themes"
  44. const dirApp = "app"
  45. const uriPub = "o"
  46. const uriPosts = "p"
  47. const uriDays = "d"
  48. const uriTags = "t"
  49. const relSelf = Relation("self") // https://www.iana.org/assignments/link-relations/link-relations.xhtml
  50. const relAlternate = Relation("alternate") // https://www.iana.org/assignments/link-relations/link-relations.xhtml
  51. const relVia = Relation("via") // Atom https://tools.ietf.org/html/rfc4287
  52. const relEnclosure = Relation("enclosure") // Atom https://tools.ietf.org/html/rfc4287
  53. const relFirst = Relation("first") // paged feeds https://tools.ietf.org/html/rfc5005#section-3
  54. const relLast = Relation("last") // paged feeds https://tools.ietf.org/html/rfc5005#section-3
  55. const relNext = Relation("next") // paged feeds https://tools.ietf.org/html/rfc5005#section-3
  56. const relPrevious = Relation("previous") // paged feeds https://tools.ietf.org/html/rfc5005#section-3
  57. const relEdit = Relation("edit") // AtomPub https://tools.ietf.org/html/rfc5023
  58. const relEditMedia = Relation("edit-media") // AtomPub https://tools.ietf.org/html/rfc5023
  59. const relUp = Relation("up") // https://www.iana.org/assignments/link-relations/link-relations.xhtml
  60. const relSearch = Relation("search") // http://www.opensearch.org/Specifications/OpenSearch/1.1#Autodiscovery_in_RSS.2FAtom
  61. const newDirPerms = 0775
  62. var rexPath = regexp.MustCompile("[^/]+")
  63. const uriPubPosts = uriPub + "/" + uriPosts + "/"
  64. const uriPubTags = uriPub + "/" + uriTags + "/"
  65. const uriPubDays = uriPub + "/" + uriDays + "/"
  66. func uri2subtitle(subtitle *HumanText, uri string) *HumanText {
  67. if strings.HasPrefix(uri, uriPubTags) {
  68. return &HumanText{Body: "#" + strings.TrimRight(uri[len(uriPubTags):], "/")}
  69. }
  70. if strings.HasPrefix(uri, uriPubDays) {
  71. return &HumanText{Body: "📅 " + strings.TrimRight(uri[len(uriPubDays):], "/")}
  72. }
  73. return subtitle
  74. }
  75. func (entry Entry) FeedFilters(uri2filter map[string]func(*Entry) bool) map[string]func(*Entry) bool {
  76. // defer un(trace("Entry.FeedFilters " + entry.Id))
  77. if nil == uri2filter {
  78. uri2filter = make(map[string]func(*Entry) bool, 10)
  79. }
  80. uri2filter[uriPubPosts] = func(*Entry) bool { return true }
  81. uri2filter[uriPubPosts+string(entry.Id)+"/"] = func(iEntry *Entry) bool { return entry.Id == iEntry.Id }
  82. uri2filter[uriPubTags] = func(*Entry) bool { return false } // dummy to get an (empty) feed
  83. for _, cat := range entry.Categories {
  84. trm := cat.Term
  85. uri2filter[uriPubTags+trm+"/"] = func(iEntry *Entry) bool {
  86. for _, iCat := range iEntry.Categories {
  87. if trm == iCat.Term { // && cat.Scheme == iCat.Scheme {
  88. return true
  89. }
  90. }
  91. return false
  92. }
  93. }
  94. // uri2filter["pub/days/", func(*Entry) bool { return false })
  95. dayStr := entry.Published.Format(time.RFC3339[:10])
  96. uri2filter[uriPubDays+dayStr+"/"] = func(iEntry *Entry) bool {
  97. return dayStr == iEntry.Published.Format(time.RFC3339[:10])
  98. }
  99. return uri2filter
  100. }
  101. func LinkRel(rel Relation, links []Link) Link {
  102. for _, l := range links {
  103. for _, r := range strings.Fields(string(l.Rel)) { // may be worth caching
  104. if rel == Relation(r) {
  105. return l
  106. }
  107. }
  108. }
  109. return Link{}
  110. }
  111. func LinkRelSelf(links []Link) Link {
  112. return LinkRel(relSelf, links)
  113. }
  114. func uriSliceSorted(uri2filter map[string]func(*Entry) bool) []string {
  115. keys := make([]string, len(uri2filter))
  116. {
  117. i := 0
  118. for k := range uri2filter {
  119. keys[i] = k
  120. i++
  121. }
  122. }
  123. sort.Strings(keys) // I don't care too much how they're sorted, I just want them to be stable.
  124. return keys
  125. }
  126. // collect all entries into all (unpaged, complete) feeds to publish.
  127. //
  128. // return sorted by Id
  129. func (seed Feed) CompleteFeeds(uri2filter map[string]func(*Entry) bool) []Feed {
  130. defer un(trace("Feed.CompleteFeeds"))
  131. ret := make([]Feed, 0, len(uri2filter))
  132. for _, uri := range uriSliceSorted(uri2filter) {
  133. entryFilter := uri2filter[uri]
  134. feed := seed // clone
  135. feed.Id = Id(uri)
  136. feed.Subtitle = uri2subtitle(feed.Subtitle, uri)
  137. feed.Entries = nil // save reallocs?
  138. for _, entry := range seed.Entries {
  139. if entryFilter(entry) {
  140. feed.Entries = append(feed.Entries, entry)
  141. }
  142. }
  143. if uriPubTags == uri {
  144. feed.Categories = AggregateCategories(seed.Entries) // rather the ones from o/p
  145. }
  146. ret = append(ret, feed)
  147. }
  148. return ret
  149. }
  150. func appendPageNumber(prefix string, page, pageCount int) string {
  151. if !strings.HasSuffix(prefix, "/") {
  152. panic("invalid input: appendPageNumber('" + prefix + "', " + fmt.Sprint(page) + ") needs a trailing slash")
  153. }
  154. if page == pageCount-1 {
  155. return prefix
  156. }
  157. return fmt.Sprintf("%s"+"-"+"%d"+"/", prefix[:len(prefix)-1], page)
  158. }
  159. func computePageCount(count int, entriesPerPage int) int {
  160. if count == 0 {
  161. // even 0 entries need one (empty) page
  162. return 1
  163. }
  164. return 1 + (count-1)/entriesPerPage
  165. }
  166. func (seed Feed) Pages(entriesPerPage int) []Feed {
  167. // defer un(trace("Feed.Pages " + seed.Id))
  168. entriesPerPage = max(1, entriesPerPage)
  169. totalEntries := len(seed.Entries)
  170. pageCount := computePageCount(totalEntries, entriesPerPage)
  171. ret := make([]Feed, 0, pageCount)
  172. uri := string(seed.Id)
  173. link := func(rel Relation, page int) Link {
  174. return Link{Rel: rel, Href: appendPageNumber(uri, page, pageCount), Title: strconv.Itoa(page + 1)}
  175. }
  176. lower := totalEntries // start past the oldest entry
  177. for page := 0; page < pageCount; page++ {
  178. feed := seed
  179. {
  180. upper := lower
  181. step := entriesPerPage
  182. if page == pageCount-2 {
  183. // only the page BEFORE the last one has variable length (if needed)
  184. step = totalEntries % entriesPerPage
  185. if 0 == step {
  186. step = entriesPerPage
  187. }
  188. }
  189. lower = max(0, upper-step)
  190. feed.Entries = seed.Entries[lower:upper]
  191. feed.Updated = iso8601(time.Time{}) // start with zero
  192. for _, ent := range feed.Entries { // max of entries
  193. if feed.Updated.Before(ent.Updated) {
  194. feed.Updated = ent.Updated
  195. }
  196. }
  197. }
  198. ls := append(make([]Link, 0, len(feed.Links)+5), feed.Links...)
  199. ls = append(ls, link(relSelf, page))
  200. // https://tools.ietf.org/html/rfc5005#section-3
  201. if pageCount > 1 {
  202. ls = append(ls, link(relLast, 0)) // oldest, i.e. lowest page number
  203. if page > 0 {
  204. ls = append(ls, link(relNext, page-1)) // older, i.e. smaller page number
  205. }
  206. if page < pageCount-1 {
  207. ls = append(ls, link(relPrevious, page+1)) // newer, i.e. higher page number
  208. }
  209. ls = append(ls, link(relFirst, pageCount-1)) // newest, i.e. largest page number
  210. } else {
  211. // TODO https://tools.ietf.org/html/rfc5005#section-2
  212. // xmlns:fh="http://purl.org/syndication/history/1.0" <fh:complete/>
  213. }
  214. feed.Links = ls
  215. ret = append(ret, feed)
  216. }
  217. return ret
  218. }
  219. func (feed Feed) CompleteFeedsForModifiedEntries(entries []*Entry) []Feed {
  220. // defer un(trace("Feed.CompleteFeedsForModifiedEntries"))
  221. var uri2filter map[string]func(*Entry) bool
  222. for _, entry := range entries {
  223. uri2filter = entry.FeedFilters(uri2filter)
  224. }
  225. if feed.Updated.IsZero() {
  226. feed.Updated = func() iso8601 {
  227. if len(feed.Entries) > 0 {
  228. ent := feed.Entries[0]
  229. if !ent.Updated.IsZero() {
  230. return ent.Updated
  231. }
  232. if !ent.Published.IsZero() {
  233. return ent.Published
  234. }
  235. }
  236. return iso8601(time.Now())
  237. }()
  238. }
  239. return feed.CompleteFeeds(uri2filter)
  240. }
  241. func (feed Feed) PagedFeeds(complete []Feed, linksPerPage int) ([]Feed, error) {
  242. defer un(trace("Feed.PagedFeeds"))
  243. xmlBase := mustParseURL(string(feed.XmlBase))
  244. if !xmlBase.IsAbs() || !strings.HasSuffix(xmlBase.Path, "/") {
  245. log.Printf("xml:base is '%s'\n", xmlBase)
  246. return []Feed{}, errors.New("feed/@xml:base must be set to an absolute URL with a trailing slash but not '" + xmlBase.String() + "'")
  247. }
  248. pages := make([]Feed, 0, 2*len(complete))
  249. for _, comp := range complete {
  250. pages = append(pages, comp.Pages(linksPerPage)...)
  251. }
  252. // do before writing but after all matching is done:
  253. catScheme := Iri(xmlBase.ResolveReference(mustParseURL(path.Join(uriPub, uriTags))).String() + "/")
  254. for _, entry := range feed.Entries {
  255. entry.XmlBase = Iri(xmlBase.String())
  256. if entry.Updated.IsZero() {
  257. entry.Updated = entry.Published
  258. }
  259. // change entries for output but don't save the change:
  260. upURL := mustParseURL(path.Join(uriPub, uriPosts) + "/")
  261. selfURL := mustParseURL(path.Join(uriPub, uriPosts, string(entry.Id)) + "/")
  262. editURL := strings.Join([]string{cgiName, "?post=", selfURL.String()}, "")
  263. entry.Id = Id(xmlBase.ResolveReference(selfURL).String()) // expand XmlBase as required by https://validator.w3.org/feed/check.cgi?url=
  264. entry.Links = append(entry.Links,
  265. Link{Rel: relSelf, Href: selfURL.String()},
  266. Link{Rel: relEdit, Href: editURL},
  267. // Link{Rel: relEditMedia, Href: editURL},
  268. Link{Rel: relUp, Href: upURL.String(), Title: feed.Title.Body}, // we need the feed-name somewhere.
  269. )
  270. for i := range entry.Categories {
  271. entry.Categories[i].Scheme = catScheme
  272. }
  273. }
  274. return pages, nil
  275. }
  276. func (app Server) PublishFeedsForModifiedEntries(feed Feed, entries []*Entry) error {
  277. defer un(trace("App.PublishFeedsForModifiedEntries"))
  278. feed.Generator = &Generator{Uri: myselfNamespace, Version: version, Body: "🌺 ShaarliGo"}
  279. sort.Sort(ByPublishedDesc(feed.Entries))
  280. // entries = feed.Entries // force write all entries. Every single one.
  281. complete := feed.CompleteFeedsForModifiedEntries(entries)
  282. if pages, err := feed.PagedFeeds(complete, app.cfg.LinksPerPage); err == nil {
  283. if err = app.PublishFeeds(pages, true); err != nil {
  284. return err
  285. } else {
  286. // just assure ALL entries index.xml.gz exist and are up to date
  287. for _, ent := range feed.Entries {
  288. if err = app.PublishEntry(ent, false); err != nil { // only if newer
  289. return err
  290. }
  291. }
  292. return nil
  293. }
  294. } else {
  295. return err
  296. }
  297. }
  298. // create a lock file to avoid races and then call PublishFeed in loop
  299. func (app Server) PublishFeeds(feeds []Feed, force bool) error {
  300. defer un(trace("App.PublishFeeds"))
  301. strFileLock := filepath.Join(dirApp, "var", "lock")
  302. // check race: if .lock exists kill pid?
  303. if byPid, err := ioutil.ReadFile(strFileLock); err == nil {
  304. if pid, err := strconv.Atoi(string(byPid)); err == nil {
  305. if proc, er := os.FindProcess(pid); er == nil {
  306. err = proc.Kill()
  307. }
  308. }
  309. if err != nil {
  310. return err
  311. }
  312. if err = os.Remove(strFileLock); err != nil {
  313. return err
  314. }
  315. }
  316. // create .lock file with pid
  317. if err := ioutil.WriteFile(strFileLock, []byte(fmt.Sprint(os.Getpid())), os.ModeExclusive); err == nil {
  318. defer os.Remove(strFileLock)
  319. for _, feed := range feeds {
  320. if err := app.PublishFeed(feed, force); err != nil {
  321. return err
  322. }
  323. if uriPubTags == LinkRelSelf(feed.Links).Href {
  324. // write additional index.json with all (public) category terms
  325. const jsonFileName = "index.json"
  326. tags := make([]string, 0, len(feed.Categories))
  327. for _, cat := range feed.Categories {
  328. tags = append(tags, "#"+cat.Term)
  329. }
  330. dstDirName := filepath.FromSlash(uriPubTags)
  331. dstFileName := filepath.Join(dstDirName, jsonFileName)
  332. tmpFileName := dstFileName + "~"
  333. var w *os.File
  334. if w, err = os.Create(tmpFileName); err == nil {
  335. defer w.Close() // just to be sure
  336. enc := json.NewEncoder(w)
  337. if err = enc.Encode(tags); err == nil {
  338. if err = w.Close(); err == nil {
  339. if err := os.Rename(tmpFileName, dstFileName); err != nil {
  340. return err
  341. }
  342. }
  343. }
  344. }
  345. }
  346. }
  347. }
  348. return nil
  349. }
  350. func (app Server) PublishFeed(feed Feed, force bool) error {
  351. const feedFileName = "index.xml"
  352. const xsltFileName = "posts.xslt"
  353. uri := LinkRelSelf(feed.Links).Href
  354. ti, to := trace(strings.Join([]string{"App.PublishFeed", uri}, " "))
  355. pathPrefix := rexPath.ReplaceAllString(uri, "..")
  356. dstDirName := filepath.FromSlash(uri)
  357. dstFileName := filepath.Join(dstDirName, feedFileName)
  358. remove := ((1 == len(feed.Entries) && feed.Entries[0].Published.IsZero()) ||
  359. 0 == len(feed.Entries)) &&
  360. "../../../" == pathPrefix
  361. if remove {
  362. log.Printf("remove %s", dstFileName)
  363. err := os.Remove(dstFileName)
  364. os.Remove(dstDirName)
  365. defer un(ti, to)
  366. return err
  367. }
  368. feed.Id = Id(string(feed.XmlBase) + string(feed.Id))
  369. mTime := time.Time(feed.Updated)
  370. var feedOrEntry interface{} = feed
  371. if "../../../" == pathPrefix && strings.HasPrefix(uri, uriPubPosts) {
  372. if 0 == len(feed.Entries) {
  373. return fmt.Errorf("Invalid feed, self: %v len(entries): %d", uri, len(feed.Entries))
  374. }
  375. if 1 < len(feed.Entries) {
  376. log.Printf("%d entries with Id: %v, keeping just one.", len(feed.Entries), uri)
  377. }
  378. ent := feed.Entries[0]
  379. feedOrEntry = ent
  380. mTime = time.Time(ent.Updated)
  381. return nil // do nothing. Single entries should pe published differently.
  382. }
  383. if fi, err := os.Stat(dstFileName); !force && (fi != nil && !fi.ModTime().Before(mTime)) && !os.IsNotExist(err) {
  384. // log.Printf("skip %s, still up to date.", dstFileName)
  385. return err
  386. }
  387. defer un(ti, to)
  388. tmpFileName := dstFileName + "~"
  389. xslt := path.Join(pathPrefix, dirThemes, "current", xsltFileName)
  390. var err error
  391. if err = os.MkdirAll(dstDirName, newDirPerms); err == nil {
  392. var w *os.File
  393. if w, err = os.Create(tmpFileName); err == nil {
  394. defer w.Close() // just to be sure
  395. enc := xml.NewEncoder(w)
  396. enc.Indent("", " ")
  397. if err = xmlEncodeWithXslt(feedOrEntry, xslt, enc); err == nil {
  398. if err = enc.Flush(); err == nil {
  399. if err = w.Close(); err == nil {
  400. os.Chtimes(tmpFileName, mTime, mTime)
  401. return os.Rename(tmpFileName, dstFileName)
  402. }
  403. }
  404. }
  405. }
  406. }
  407. return err
  408. }
  409. func (app Server) PublishEntry(ent *Entry, force bool) error {
  410. const feedFileName = "index.xml"
  411. const xsltFileName = "posts.xslt"
  412. uri := LinkRelSelf(ent.Links).Href
  413. ti, to := trace(strings.Join([]string{"App.PublishEntry", uri}, " "))
  414. pathPrefix := rexPath.ReplaceAllString(uri, "..")
  415. dstDirName := filepath.FromSlash(uri)
  416. dstFileName := filepath.Join(dstDirName, feedFileName)
  417. var feedOrEntry interface{} = ent
  418. ent.Id = Id(string(ent.XmlBase) + string(ent.Id))
  419. mTime := time.Time(ent.Updated)
  420. if fi, err := os.Stat(dstFileName); !force && (fi != nil && !fi.ModTime().Before(mTime)) && !os.IsNotExist(err) {
  421. // log.Printf("skip %s, still up to date.", dstFileName)
  422. return err
  423. }
  424. defer un(ti, to)
  425. tmpFileName := dstFileName + "~"
  426. xslt := path.Join(pathPrefix, dirThemes, "current", xsltFileName)
  427. var err error
  428. if err = os.MkdirAll(dstDirName, newDirPerms); err == nil {
  429. var w *os.File
  430. if w, err = os.Create(tmpFileName); err == nil {
  431. enc := xml.NewEncoder(w)
  432. enc.Indent("", " ")
  433. if err = xmlEncodeWithXslt(feedOrEntry, xslt, enc); err == nil {
  434. if err = enc.Flush(); err == nil {
  435. if err = w.Close(); err == nil {
  436. os.Chtimes(tmpFileName, mTime, mTime)
  437. return os.Rename(tmpFileName, dstFileName)
  438. }
  439. }
  440. }
  441. }
  442. }
  443. return err
  444. }