123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- #!/usr/bin/python
- # Title : alexa.py
- # Description: Alexa Site Overview parser
- # Author : linuxitux
- # Date : 22-08-2016
- # Usage : ./alexa.py
- # Notes : Edit blogsf accordingly
- import urllib2
- import sys
- import time
- import operator
- import datetime
- # File containing domains, one per line
- blogsf = "~/blogs.txt"
- # Alexa Site Overview URL
- url = "http://www.alexa.com/siteinfo/"
- # Please, be gentle
- sleeptime = 3
- # Rank delimiters
- substr1 = "<strong "
- substr2 = "</strong"
- substr3 = "-->"
- # Get current date
- date = time.strftime("%Y-%m-%d")
- # Database <--- facepalm
- outfile = open("~/ranks_"+date, "w")
- # Get sites list (one per line)
- with open(blogsf) as blogs:
- sites = blogs.readlines()
- # Table to store ranks
- results = []
- for site in sites:
- # Get page
- req = urllib2.Request(url=url+site)
- p = urllib2.urlopen(req)
- resp = p.read()
- # Parse HTML response
- resp = resp[resp.index(substr1):]
- resp = resp[:resp.index(substr2)]
- resp = resp[resp.index(substr3)+3:]
- resp = resp.replace(" ","")
- rank = resp.replace("\n","")
- # Discard newline in site
- site = site.replace("\n","")
- # Discard commas in rank
- rank = rank.replace(",","")
- # Store (site,rank) pair in table
- try:
- row = (site,int(rank))
- results.append(row)
- except Exception:
- print >> sys.stderr, site+" has no rank"
- row = (site,0)
- results.append(row)
- time.sleep(sleeptime)
- # Sort and print table in HTML format
- # Print style
- print """
- <style>
- .alexaranks {
- font-family: monospace;
- font-size: 13px;
- color: #333;
- }
- a, a:active, a:focus, a:visited {
- color: #2c9c30;
- }
- </style>
- """
- # Print date
- print '<p class="alexaranks"><i>Actualizado: '+datetime.date.today().strftime('%d/%m/%Y')+'</i></p>'
- # Print table headers
- print '<table id="alexaranks" class="alexaranks">'
- # Counter
- count = 1
- # Print sorted rows (by rank)
- for srow in sorted(results,key=lambda row: row[1]):
- site, rank = srow
- # Print line, if rank not zero
- if rank > 0:
- print '<tr><td>'+str(count)+'</td><td><a href="http://'+site+'/">'+site+'</a></td><td style="text-align: right;">'+"{:,}".format(rank)+'</td></tr>'
- outfile.write(site+","+str(rank)+"\n")
- count = count+1
- # Print table closing tag
- print '</table>'
- print '<p> </p>'
- # "Disconnect" from "database"
- outfile.close()
- # End
|