dial.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. // Copyright 2015 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. package p2p
  17. import (
  18. "container/heap"
  19. "crypto/rand"
  20. "errors"
  21. "fmt"
  22. "net"
  23. "time"
  24. "github.com/ethereum/go-ethereum/log"
  25. "github.com/ethereum/go-ethereum/p2p/discover"
  26. "github.com/ethereum/go-ethereum/p2p/netutil"
  27. )
  28. const (
  29. // This is the amount of time spent waiting in between
  30. // redialing a certain node.
  31. dialHistoryExpiration = 30 * time.Second
  32. // Discovery lookups are throttled and can only run
  33. // once every few seconds.
  34. lookupInterval = 4 * time.Second
  35. // If no peers are found for this amount of time, the initial bootnodes are
  36. // attempted to be connected.
  37. fallbackInterval = 20 * time.Second
  38. // Endpoint resolution is throttled with bounded backoff.
  39. initialResolveDelay = 60 * time.Second
  40. maxResolveDelay = time.Hour
  41. )
  42. // NodeDialer is used to connect to nodes in the network, typically by using
  43. // an underlying net.Dialer but also using net.Pipe in tests
  44. type NodeDialer interface {
  45. Dial(*discover.Node) (net.Conn, error)
  46. }
  47. // TCPDialer implements the NodeDialer interface by using a net.Dialer to
  48. // create TCP connections to nodes in the network
  49. type TCPDialer struct {
  50. *net.Dialer
  51. }
  52. // Dial creates a TCP connection to the node
  53. func (t TCPDialer) Dial(dest *discover.Node) (net.Conn, error) {
  54. addr := &net.TCPAddr{IP: dest.IP, Port: int(dest.TCP)}
  55. return t.Dialer.Dial("tcp", addr.String())
  56. }
  57. // dialstate schedules dials and discovery lookups.
  58. // it get's a chance to compute new tasks on every iteration
  59. // of the main loop in Server.run.
  60. type dialstate struct {
  61. maxDynDials int
  62. ntab discoverTable
  63. netrestrict *netutil.Netlist
  64. lookupRunning bool
  65. dialing map[discover.NodeID]connFlag
  66. lookupBuf []*discover.Node // current discovery lookup results
  67. randomNodes []*discover.Node // filled from Table
  68. static map[discover.NodeID]*dialTask
  69. hist *dialHistory
  70. start time.Time // time when the dialer was first used
  71. bootnodes []*discover.Node // default dials when there are no peers
  72. }
  73. type discoverTable interface {
  74. Self() *discover.Node
  75. Close()
  76. Resolve(target discover.NodeID) *discover.Node
  77. Lookup(target discover.NodeID) []*discover.Node
  78. ReadRandomNodes([]*discover.Node) int
  79. }
  80. // the dial history remembers recent dials.
  81. type dialHistory []pastDial
  82. // pastDial is an entry in the dial history.
  83. type pastDial struct {
  84. id discover.NodeID
  85. exp time.Time
  86. }
  87. type task interface {
  88. Do(*Server)
  89. }
  90. // A dialTask is generated for each node that is dialed. Its
  91. // fields cannot be accessed while the task is running.
  92. type dialTask struct {
  93. flags connFlag
  94. dest *discover.Node
  95. lastResolved time.Time
  96. resolveDelay time.Duration
  97. }
  98. // discoverTask runs discovery table operations.
  99. // Only one discoverTask is active at any time.
  100. // discoverTask.Do performs a random lookup.
  101. type discoverTask struct {
  102. results []*discover.Node
  103. }
  104. // A waitExpireTask is generated if there are no other tasks
  105. // to keep the loop in Server.run ticking.
  106. type waitExpireTask struct {
  107. time.Duration
  108. }
  109. func newDialState(static []*discover.Node, bootnodes []*discover.Node, ntab discoverTable, maxdyn int, netrestrict *netutil.Netlist) *dialstate {
  110. s := &dialstate{
  111. maxDynDials: maxdyn,
  112. ntab: ntab,
  113. netrestrict: netrestrict,
  114. static: make(map[discover.NodeID]*dialTask),
  115. dialing: make(map[discover.NodeID]connFlag),
  116. bootnodes: make([]*discover.Node, len(bootnodes)),
  117. randomNodes: make([]*discover.Node, maxdyn/2),
  118. hist: new(dialHistory),
  119. }
  120. copy(s.bootnodes, bootnodes)
  121. for _, n := range static {
  122. s.addStatic(n)
  123. }
  124. return s
  125. }
  126. func (s *dialstate) addStatic(n *discover.Node) {
  127. // This overwites the task instead of updating an existing
  128. // entry, giving users the opportunity to force a resolve operation.
  129. s.static[n.ID] = &dialTask{flags: staticDialedConn, dest: n}
  130. }
  131. func (s *dialstate) removeStatic(n *discover.Node) {
  132. // This removes a task so future attempts to connect will not be made.
  133. delete(s.static, n.ID)
  134. // This removes a previous dial timestamp so that application
  135. // can force a server to reconnect with chosen peer immediately.
  136. s.hist.remove(n.ID)
  137. }
  138. func (s *dialstate) newTasks(nRunning int, peers map[discover.NodeID]*Peer, now time.Time) []task {
  139. if s.start.IsZero() {
  140. s.start = now
  141. }
  142. var newtasks []task
  143. addDial := func(flag connFlag, n *discover.Node) bool {
  144. if err := s.checkDial(n, peers); err != nil {
  145. log.Trace("Skipping dial candidate", "id", n.ID, "addr", &net.TCPAddr{IP: n.IP, Port: int(n.TCP)}, "err", err)
  146. return false
  147. }
  148. s.dialing[n.ID] = flag
  149. newtasks = append(newtasks, &dialTask{flags: flag, dest: n})
  150. return true
  151. }
  152. // Compute number of dynamic dials necessary at this point.
  153. needDynDials := s.maxDynDials
  154. for _, p := range peers {
  155. if p.rw.is(dynDialedConn) {
  156. needDynDials--
  157. }
  158. }
  159. for _, flag := range s.dialing {
  160. if flag&dynDialedConn != 0 {
  161. needDynDials--
  162. }
  163. }
  164. // Expire the dial history on every invocation.
  165. s.hist.expire(now)
  166. // Create dials for static nodes if they are not connected.
  167. for id, t := range s.static {
  168. err := s.checkDial(t.dest, peers)
  169. switch err {
  170. case errNotWhitelisted, errSelf:
  171. log.Warn("Removing static dial candidate", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP, Port: int(t.dest.TCP)}, "err", err)
  172. delete(s.static, t.dest.ID)
  173. case nil:
  174. s.dialing[id] = t.flags
  175. newtasks = append(newtasks, t)
  176. }
  177. }
  178. // If we don't have any peers whatsoever, try to dial a random bootnode. This
  179. // scenario is useful for the testnet (and private networks) where the discovery
  180. // table might be full of mostly bad peers, making it hard to find good ones.
  181. if len(peers) == 0 && len(s.bootnodes) > 0 && needDynDials > 0 && now.Sub(s.start) > fallbackInterval {
  182. bootnode := s.bootnodes[0]
  183. s.bootnodes = append(s.bootnodes[:0], s.bootnodes[1:]...)
  184. s.bootnodes = append(s.bootnodes, bootnode)
  185. if addDial(dynDialedConn, bootnode) {
  186. needDynDials--
  187. }
  188. }
  189. // Use random nodes from the table for half of the necessary
  190. // dynamic dials.
  191. randomCandidates := needDynDials / 2
  192. if randomCandidates > 0 {
  193. n := s.ntab.ReadRandomNodes(s.randomNodes)
  194. for i := 0; i < randomCandidates && i < n; i++ {
  195. if addDial(dynDialedConn, s.randomNodes[i]) {
  196. needDynDials--
  197. }
  198. }
  199. }
  200. // Create dynamic dials from random lookup results, removing tried
  201. // items from the result buffer.
  202. i := 0
  203. for ; i < len(s.lookupBuf) && needDynDials > 0; i++ {
  204. if addDial(dynDialedConn, s.lookupBuf[i]) {
  205. needDynDials--
  206. }
  207. }
  208. s.lookupBuf = s.lookupBuf[:copy(s.lookupBuf, s.lookupBuf[i:])]
  209. // Launch a discovery lookup if more candidates are needed.
  210. if len(s.lookupBuf) < needDynDials && !s.lookupRunning {
  211. s.lookupRunning = true
  212. newtasks = append(newtasks, &discoverTask{})
  213. }
  214. // Launch a timer to wait for the next node to expire if all
  215. // candidates have been tried and no task is currently active.
  216. // This should prevent cases where the dialer logic is not ticked
  217. // because there are no pending events.
  218. if nRunning == 0 && len(newtasks) == 0 && s.hist.Len() > 0 {
  219. t := &waitExpireTask{s.hist.min().exp.Sub(now)}
  220. newtasks = append(newtasks, t)
  221. }
  222. return newtasks
  223. }
  224. var (
  225. errSelf = errors.New("is self")
  226. errAlreadyDialing = errors.New("already dialing")
  227. errAlreadyConnected = errors.New("already connected")
  228. errRecentlyDialed = errors.New("recently dialed")
  229. errNotWhitelisted = errors.New("not contained in netrestrict whitelist")
  230. )
  231. func (s *dialstate) checkDial(n *discover.Node, peers map[discover.NodeID]*Peer) error {
  232. _, dialing := s.dialing[n.ID]
  233. switch {
  234. case dialing:
  235. return errAlreadyDialing
  236. case peers[n.ID] != nil:
  237. return errAlreadyConnected
  238. case s.ntab != nil && n.ID == s.ntab.Self().ID:
  239. return errSelf
  240. case s.netrestrict != nil && !s.netrestrict.Contains(n.IP):
  241. return errNotWhitelisted
  242. case s.hist.contains(n.ID):
  243. return errRecentlyDialed
  244. }
  245. return nil
  246. }
  247. func (s *dialstate) taskDone(t task, now time.Time) {
  248. switch t := t.(type) {
  249. case *dialTask:
  250. s.hist.add(t.dest.ID, now.Add(dialHistoryExpiration))
  251. delete(s.dialing, t.dest.ID)
  252. case *discoverTask:
  253. s.lookupRunning = false
  254. s.lookupBuf = append(s.lookupBuf, t.results...)
  255. }
  256. }
  257. func (t *dialTask) Do(srv *Server) {
  258. if t.dest.Incomplete() {
  259. if !t.resolve(srv) {
  260. return
  261. }
  262. }
  263. err := t.dial(srv, t.dest)
  264. if err != nil {
  265. log.Trace("Dial error", "task", t, "err", err)
  266. // Try resolving the ID of static nodes if dialing failed.
  267. if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 {
  268. if t.resolve(srv) {
  269. t.dial(srv, t.dest)
  270. }
  271. }
  272. }
  273. }
  274. // resolve attempts to find the current endpoint for the destination
  275. // using discovery.
  276. //
  277. // Resolve operations are throttled with backoff to avoid flooding the
  278. // discovery network with useless queries for nodes that don't exist.
  279. // The backoff delay resets when the node is found.
  280. func (t *dialTask) resolve(srv *Server) bool {
  281. if srv.ntab == nil {
  282. log.Debug("Can't resolve node", "id", t.dest.ID, "err", "discovery is disabled")
  283. return false
  284. }
  285. if t.resolveDelay == 0 {
  286. t.resolveDelay = initialResolveDelay
  287. }
  288. if time.Since(t.lastResolved) < t.resolveDelay {
  289. return false
  290. }
  291. resolved := srv.ntab.Resolve(t.dest.ID)
  292. t.lastResolved = time.Now()
  293. if resolved == nil {
  294. t.resolveDelay *= 2
  295. if t.resolveDelay > maxResolveDelay {
  296. t.resolveDelay = maxResolveDelay
  297. }
  298. log.Debug("Resolving node failed", "id", t.dest.ID, "newdelay", t.resolveDelay)
  299. return false
  300. }
  301. // The node was found.
  302. t.resolveDelay = initialResolveDelay
  303. t.dest = resolved
  304. log.Debug("Resolved node", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP, Port: int(t.dest.TCP)})
  305. return true
  306. }
  307. type dialError struct {
  308. error
  309. }
  310. // dial performs the actual connection attempt.
  311. func (t *dialTask) dial(srv *Server, dest *discover.Node) error {
  312. fd, err := srv.Dialer.Dial(dest)
  313. if err != nil {
  314. return &dialError{err}
  315. }
  316. mfd := newMeteredConn(fd, false)
  317. return srv.SetupConn(mfd, t.flags, dest)
  318. }
  319. func (t *dialTask) String() string {
  320. return fmt.Sprintf("%v %x %v:%d", t.flags, t.dest.ID[:8], t.dest.IP, t.dest.TCP)
  321. }
  322. func (t *discoverTask) Do(srv *Server) {
  323. // newTasks generates a lookup task whenever dynamic dials are
  324. // necessary. Lookups need to take some time, otherwise the
  325. // event loop spins too fast.
  326. next := srv.lastLookup.Add(lookupInterval)
  327. if now := time.Now(); now.Before(next) {
  328. time.Sleep(next.Sub(now))
  329. }
  330. srv.lastLookup = time.Now()
  331. var target discover.NodeID
  332. rand.Read(target[:])
  333. t.results = srv.ntab.Lookup(target)
  334. }
  335. func (t *discoverTask) String() string {
  336. s := "discovery lookup"
  337. if len(t.results) > 0 {
  338. s += fmt.Sprintf(" (%d results)", len(t.results))
  339. }
  340. return s
  341. }
  342. func (t waitExpireTask) Do(*Server) {
  343. time.Sleep(t.Duration)
  344. }
  345. func (t waitExpireTask) String() string {
  346. return fmt.Sprintf("wait for dial hist expire (%v)", t.Duration)
  347. }
  348. // Use only these methods to access or modify dialHistory.
  349. func (h dialHistory) min() pastDial {
  350. return h[0]
  351. }
  352. func (h *dialHistory) add(id discover.NodeID, exp time.Time) {
  353. heap.Push(h, pastDial{id, exp})
  354. }
  355. func (h *dialHistory) remove(id discover.NodeID) bool {
  356. for i, v := range *h {
  357. if v.id == id {
  358. heap.Remove(h, i)
  359. return true
  360. }
  361. }
  362. return false
  363. }
  364. func (h dialHistory) contains(id discover.NodeID) bool {
  365. for _, v := range h {
  366. if v.id == id {
  367. return true
  368. }
  369. }
  370. return false
  371. }
  372. func (h *dialHistory) expire(now time.Time) {
  373. for h.Len() > 0 && h.min().exp.Before(now) {
  374. heap.Pop(h)
  375. }
  376. }
  377. // heap.Interface boilerplate
  378. func (h dialHistory) Len() int { return len(h) }
  379. func (h dialHistory) Less(i, j int) bool { return h[i].exp.Before(h[j].exp) }
  380. func (h dialHistory) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
  381. func (h *dialHistory) Push(x interface{}) {
  382. *h = append(*h, x.(pastDial))
  383. }
  384. func (h *dialHistory) Pop() interface{} {
  385. old := *h
  386. n := len(old)
  387. x := old[n-1]
  388. *h = old[0 : n-1]
  389. return x
  390. }