WebArchiverBot.hs 1.5 KB

12345678910111213141516171819202122232425262728293031323334
  1. {-| Scans page of Markdown looking for http links; when found it prints them out to a default file.
  2. This plugin is meant to be run in conjunction with archiver <http://hackage.haskell.org/package/archiver>.
  3. If you do not wish to run it (for example, you have no more than a dozen external http links on any page),
  4. then you should use the original WebArchiver.hs plugin.
  5. Limitations:
  6. * Only parses Markdown, not ReST or any other format; this is because 'readMarkdown'
  7. is hardwired into it.
  8. By: Gwern Branwen; placed in the public domain -}
  9. module WebArchiverBot (plugin) where
  10. import System.Directory (getHomeDirectory)
  11. import Network.Gitit.Interface (liftIO, bottomUpM, Plugin(PreCommitTransform), Inline(Link))
  12. import Text.Pandoc (defaultParserState, readMarkdown)
  13. plugin :: Plugin
  14. plugin = PreCommitTransform archivePage
  15. -- archivePage :: (MonadIO m) => String -> m String
  16. archivePage x = do let p = readMarkdown defaultParserState x
  17. -- force evaluation and archiving side-effects
  18. _p' <- liftIO $ bottomUpM archiveLinks p
  19. return x -- note: this is read-only - don't actually change page!
  20. archiveLinks :: Inline -> IO Inline
  21. archiveLinks x@(Link _ ('!':_, _)) = return x -- skip interwiki links
  22. archiveLinks x@(Link _ ('#':_, _)) = return x -- skip section links
  23. archiveLinks x@(Link _ (uln, _)) = do homedir <- getHomeDirectory
  24. appendFile (homedir++"/.urls.txt") (uln++"\n")
  25. return x
  26. archiveLinks x = return x