# referer-related configuration

# google's cache of web pages
GOOGLECACHE = [
    "216.239.37.100",
    "216.239.39.100",
    "216.239.57.100",
]

# search engines that will show up as referers
SEARCHENGINES = [
    "alltheweb.com",
    "altavista.com",
    "aolsearch.aol.com",
    "ask.com",
    "comcast.net",
    "google.com",
    "metacrawler.com",
    "msxml.excite.com",
    "ms101.mysearch.com",
    "new.search.yahoo.com",
    "search.cometsystems.com",
    "search.iwon.com",
    "search.ke.voila.fr",
    "search.metacrawler.com",
    "search.msn.com",
    "search.msn.co.za",
    "search.netscape.com",
    "search.ninemsn.com.au",
    "search.virgilio.it",
    "search.yahoo.com",
    "software.startnow.com",
    "sidesearch.lycos.com",
    "uk.search.yahoo.com",
    "webferret.search.com",
    "websearch.cnn.com",
    "web.ask.com",
]

MYREFERER = [
    "nbm.rucus.net",
]

REFERERSPAMMERS = [
]

# referers to exclude from referer reports
# 
# Include your web site, its IPs, and any "referer spamming" you have.
# IPs which should be excluded from all processing
EXCLUDEIPS = [
    "196.7.14.18",
]

# IP to name conversion
IP_TO_NAME = {
    "196.7.0.164": "UUNet cache",
}

# Searches to exclude from search processing
EXCLUDESEARCHES = [
    "fire daemon",
    "\"fire daemon",
    "fire+daemon",
    "\"fire daemon\"",
]

# Known aggregators
AGGREGATORS = {
    'http://www.methodize.org/nntprss/': 'NNTP/RSS',
    'tnntprss/': 'tnntprss',
    'rssSearch Harvester/': 'rssSearch',
    'Hep/': 'Hep',
    'Frontier/': 'Frontier',
    'htpp://www.blogpulse.com/': 'BlogPulse',
}

# Bots that archive/index
BOTS = {
    'Googlebot/': 'Google',
    'TurnitinBot/': 'TurnitinBot',
    'Googlebot-Image/': 'Google', 
    'Openbot/': 'Openbot',
    'Wget/': 'wget',
    'curl/': 'curl',
    'ia_archiver': 'ia_archiver',
    'Indy Library': 'Indy Library',
    'RPT-HTTPClient/': 'RPT-HTTPClient',
    'bookwatch@onfocus.com': 'bookwatch@onfocus.com',
    'Ask Jeeves': 'Ask Jeeves',
    'http://brainoff.com/geoblog/': 'GeoBlog',
    'http://www.inktomi.com/slurp.html': 'Inktomi',
    'http://fast.no/support/crawler.asp': 'Fast',
    'http://www.picsearch.com/bot.html': 'Picsearch.com',
    'organica/': 'organica',
    'Python-urllib/': 'Python-urllib',
    'libwww-perl/': 'libwww-perl',
    'http://www.almaden.ibm.com/cs/crawler': 'Almaden',
    'Microsoft-WebDAV-MiniRedir/': 'MS WebDAV',
    'daypopbot/': 'daypopbot',
    'sitecheck.internetseer.com': 'sitecheck.internetseer.com',
    'Scooter/': 'Scooter',
    'Microsoft-WebDAV-MiniRedir/': 'MS WebDAV',
    'FAST-WebCrawler/': 'Fast',
    'dloader(NaverRobot)/': 'NaverRobot',
    'larbin_': 'larbin',
    'timboBot/': 'timboBot',
    'perl': 'perl',
    'MicrosoftPrototypeCrawler': 'MicrosoftPrototypeCrawler',
    'NITLE Blog Spider/': 'NITLE Blog Spider',
    'EasyDL/': 'EasyDL',
    'Gigabot/': 'Gigabot',
    'Blogosphere/': 'Blogosphere',
    'LARBIN-EXPERIMENTAL': 'larbin',
    'Crit/': 'Crit',
    'CFNetwork/': 'CFNetwork',
    'Zao/': 'Zao',
    'QuepasaCreep': 'QuepasaCreep',
    'ZyBorg/': 'ZyBorg',
    'grub-client-': 'grub',
    'Tcl http client package': 'tcl',
    'UnknownLWPApp/': 'LWP',
    'BlogShares Bot/': 'BlogShares',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows 98) XX': 'Unknown',
    'Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)': 'Unknown',
    'Mozilla/4.0 (compatible; MSIE 5.00; Windows 98': 'Unknown',
    'http://search.msn.com/msnbot.htm': 'MSNBot',
    'Java1.': 'Java',
    'Java/1.': 'Java',
    'NPBot (http://www.nameprotect.com/botinfo.html)': 'NameProtect Bot',
    'Genome Machine': 'Genome Machine',
    'BlogPulse': 'Intelliseek BlogPulse',
    'Netcraft Web Server Survey': 'Netcraft',
    'HTTrack 3': 'HTTrack',
    'Exalead NG/MimeLive Client': 'Exaload',
    'Computer_and_Automation_Research_Institute_Crawler': 'CARI Crawler',
    'http://cis.poly.edu/polybot/': 'PolyBot',
    'lwp-request/': 'lwp-request',
    'Waypath Scout': 'Waypath Scout',
    'BlogVisionEye/': 'BlogVisionEye',
    'obidos-bot': 'obidos-bot',
    'Speedy Spider (http://www.entireweb.com)': 'entireweb',
    'nhnbot@naver.com': 'nhnbot',
    'spider@goliat.hu': 'goliatspider263',
    'EARTHCOM.info/': 'earthcom.info',
    'BravoBrian SpiderEngine MarcoPolo': 'BravoBrian',
    'http://www.tsgk.net': 'Transgenikbot',
    'StarProse Referrer Advertising System': 'StarProse',
    'Technoratibot/': 'technoratibot',
    'DigExt': 'IE "make available online"',
    'sherlock_spider': 'Sherlock Spider',
    'Yahoo! Slurp' : 'Yahoo! Slurp',
    'http://www.alltheweb.com/help/webmaster/crawler': 'All The Web Crawler (Yahoo!)',
    'XML::RSS::uptimeRSSFetch': 'XML::RSS::uptimeRSSFetch',
    'FeedValidator/': 'FeedValidator',
    'CrawlConvera': 'CrawlConvera',
    'Mozilla/4.08': 'Unknown',
}

BOTS.update(AGGREGATORS)

# Local URLs to ignore - primarily IIS exploits
IGNORE_URLS = [
    "default.ida",
    "/MSADC/",
    "/msadc/",
    "/scripts/",
    "/script/",
    "/winnt/",
    "cmd.exe",
    "formmail",
    "GET http://",
    "SEARCH",
    ".asp",
    ".dll",
    ".vts",
    ".exe",
    "/cgi-bin/", # Might not want to ignore this
    "/cgi-win/",
    "/cgi-dos/",
    "FormMail",
    "/null.id",
    ".php", # Might not want to ignore this
    "/_vti_",
    "/cfdocs/",
    "/code/faqmanager.cgi",
    "/ftp.pl",
    "/manual/",
    "/quote.html",
    "/files/DNS_Firewall_Infr.doc",
    "/files/internal.for.bje",
]

# Responses that we should exclude from any processing
EXCLUDEDRESPONSES = [
    "301", # assuming we don't care about redirects
    "302", # assuming we don't care about redirects
    "400",
    "401",
    "403",
    #"404", # ARG!  vellum's 404 handler stuff means I can't ignore 404s
    "405",
    "500",
    "501",
]

# Maximum reference length, to prevent over-long URLs in tables
MAXREFLENGTH = 90

# Maximum "key" name length, to prevent graphic problems
MAXGRAPHNAMELENGTH = 40

# Reports to create
#
# variable is data["variable"] to examine
# filename is HTML filename for the report
# title
# key is the table header for the name
# value is the table header for the value
# image is the graph to create, if any.  None means no image.
# method is either "Top" or "Percentage"
# top is how many entries to put on the graph
# threshold is the percentage threshold over which to have an entry on
#     the graph
#
REPORTS = [
    {
        "variable": "refbysite",
        "filename": "referersbysite.html",
        "title": "Referers by Site",
        "key": "Sites",
        "value": "Referals",
        "image": "referersbysite.png",
        "method": "Top",
        "top": 10,
    },
    {
        "variable": "browsers",
        "filename": "browsers.html",
        "title": "Visitors by Browser",
        "key": "Browser",
        "value": "Visitors",
        "image": "visitorsbybrowser.png",
        "method": "Percentage",
        "threshold": 1.5,
    },
    {
        "variable": "os",
        "filename": "os.html",
        "title": "Visitors by Operating System",
        "key": "Operating System",
        "value": "Visitors",
        "image": "visitorsbyos.png",
        "method": "Percentage",
        "threshold": 1.5,
    },
    {
        "variable": "urls",
        "filename": "urls.html",
        "title": "Tops URLs",
        "key": "URL",
        "value": "Visits",
        "image": "urls.png",
        "method": "Percentage",
        "top": 20,
        "threshold": 3,
    },
    {
        "variable": "downloadurls",
        "filename": "downloads.html",
        "title": "Downloads",
        "key": "File",
        "value": "Downloads",
        "image": "downloads.png",
        "method": "Percentage",
        "top": 20,
        "threshold": 1.5,
    },
    {
        "variable": "ips",
        "filename": "visitors.html",
        "title": "Biggest visitors",
        "key": "Visitor",
        "value": "Visits",
        "image": "visitors.png",
        "method": "Top",
        "top": 15,
        "threshold": 2,
    },
    {
        "variable": "searches",
        "filename": "searches.html",
        "title": "Searches",
        "image": None,
        "key": "Search Terms",
        "value": "Searches",
    },
    {
        "variable": "searchesbyengine",
        "filename": "searchesbyengine.html",
        "title": "Searches by engine",
        "image": "searchesbyengine.png",
        "key": "Search Engine",
        "value": "Number of searches",
        "method": "Percentage",
        "threshold": 1.5,
    },
    {
        "variable": "countries",
        "filename": "countries.html",
        "title": "Visitors by Country",
        "key": "Country",
        "value": "Visitors",
        "image": "countries.png",
        "method": "Top",
        "top": 10,
        "threshold": 1.5,
    },
    {
        "variable": "visitsbybot",
        "filename": "visitsbybot.html",
        "title": "Visits by Bot",
        "key": "Bot",
        "value": "Visits",
        "image": "visitsbybot.png",
        "method": "Top",
        "top": 10,
        "threshold": 1.5,
    },
    {
        "variable": "refbypage",
        "filename": "referers.html",
        "title": "Referers",
        "image": None,
        "key": "Referring URL",
        "value": "Referals",
    },
    {
        "variable": "unknownbrowsers",
        "filename": "unknownbrowsers.html",
        "title": "Visitors by Unknown Browser",
        "key": "Unknown Browser",
        "value": "Visitors",
        "image": None,
        "method": "Percentage",
        "threshold": 1.5,
    },
    {
        "variable": "unknownos",
        "filename": "unknownos.html",
        "title": "Visitors by Unknown Operating System",
        "key": "Unknown Operating System",
        "value": "Visitors",
        "image": None,
        "method": "Percentage",
        "threshold": 1.5,
    },
    {
        "variable": "mozbrowsers",
        "filename": "mozbrowsers.html",
        "title": "Visitors by Mozilla Browser",
        "key": "Mozilla Browser",
        "value": "Visitors",
        "image": None,
        "method": "Percentage",
        "threshold": 1.5,
    },
    {
        "variable": "entry_pages",
        "filename": "entry_pages.html",
        "title": "Visits by Entry Page",
        "key": "Entry Page",
        "value": "Visits",
        "image": "entry_pages.png",
        "method": "Percentage",
        "threshold": 1.5,
    },
]

# "Header" for the HTML files
TEMPLATE_START = """
  <head>
    <title>%s</title>
    <link rel="stylesheet" href="nbm.css" type="text/css">
    <link rel="stylesheet" href="referers.css" type="text/css">
  </head>
  <body>
    <div id="main">"""

# "Footer" for the HTML files
TEMPLATE_END = """</div></body></html>"""

# For EXCLUDEDBYFUNC, the locations you browser to the site from.
PLACES_I_VISIT = [
    "196.15.188.2",
]

# For EXCLUDEDBYFUNC, the Agent strings for your browsers.
MY_AGENTS = [
    "Mozilla/5.0 (compatible; Konqueror/3.1; FreeBSD 5.0-RELEASE)",
]


# Where to place the output of the script in HTML mode
STATSOUTPUTDIR = "output"

HTMLEXCLUDEPATTERNS = [
#    "/stats/",
]

HTMLPATTERNS = [
    ".html",
    "/ ",
]

DOWNLOADEXCLUDEPATTERNS = [
    ".doc",
]

DOWNLOADPATTERNS = [
    "/files/",
]

SEARCH_PREFIXES = (
    'q',
    'p',
    'as_q',
    'as_epq',
    'query',
    'qry',
    'searchfor',
    'ask',
    'kw',
    'qkw',
    'general',
    'MT',
    'aqa',
)

PLATFORM_WIN95 = ('Windows 95', 'Win95')
PLATFORM_WIN98 = ('Windows 98', 'Win98')
PLATFORM_WIN2000 = ('Windows 2000', 'Win2000', 'Windows NT 5.0')
PLATFORM_WINXP = ('Windows NT 5.1', 'Windows XP')
PLATFORM_WINNT = ('Windows NT', 'WinNT')
PLATFORM_MACOS = ('Mac',)
PLATFORM_MACOSX = ('Mac OS X',)
PLATFORM_LINUX = ('Linux',)
PLATFORM_FREEBSD = ('FreeBSD',)
PLATFORM_SOLARIS = ('SunOS',)

PLATFORMS = [
    ("Windows 95", PLATFORM_WIN95, None),
    ("Windows 98", PLATFORM_WIN98, None),
    ("Windows 2000", PLATFORM_WIN2000, None),
    ("Windows XP", PLATFORM_WINXP, None),
    ("Windows NT", PLATFORM_WINNT, PLATFORM_WINXP + PLATFORM_WIN2000),
    ("Mac OS", PLATFORM_MACOS, PLATFORM_MACOSX),
    ("Mac OS X", PLATFORM_MACOSX, None),
    ("Linux", PLATFORM_LINUX, None),
    ("FreeBSD", PLATFORM_FREEBSD, None),
    ("Solaris", PLATFORM_SOLARIS, None),
]

BROWSER_IE6 = ('MSIE 6',)
BROWSER_IE5 = ('MSIE 5',)
BROWSER_IE4 = ('MSIE 4',)
BROWSER_IE = ('MSIE',)
BROWSER_MOZILLA = ('Mozilla/5',)
BROWSER_NS4 = ('Mozilla/4',)
BROWSER_OPERA = ('Opera',)
BROWSER_KONQUEROR = ('Konqueror',)
BROWSER_LYNX = ('Lynx',)
BROWSER_LINKS = ('Links',)
BROWSER_SAFARI = ('Safari',)
BROWSER_FIREBIRD = ('Phoenix/','Firebird/')
BROWSER_GALEON = ('Galeon/',)
BROWSER_CAMINO = ('Camino/','Chimera/')
BROWSER_NETSCAPE = ('Netscape/',)

BROWSER_NOT_MOZILLA = BROWSER_OPERA + BROWSER_KONQUEROR + BROWSER_IE + \
    BROWSER_SAFARI + BROWSER_FIREBIRD + BROWSER_GALEON + BROWSER_CAMINO + \
    BROWSER_NETSCAPE

BROWSERS = [
    ("IE6", BROWSER_IE6, BROWSER_OPERA + BROWSER_KONQUEROR),
    ("IE5", BROWSER_IE5, BROWSER_OPERA + BROWSER_KONQUEROR),
    ("IE4", BROWSER_IE4, BROWSER_OPERA + BROWSER_KONQUEROR),
    ("Mozilla", BROWSER_MOZILLA, BROWSER_NOT_MOZILLA),
    ("NS4", BROWSER_NS4, BROWSER_OPERA + BROWSER_KONQUEROR + BROWSER_IE),
    ("Opera", BROWSER_OPERA, None),
    ("Safari", BROWSER_SAFARI, None),
    ("Galeon", BROWSER_GALEON, None),
    ("Firebird", BROWSER_FIREBIRD, None),
    ("Camino", BROWSER_CAMINO, None),
    ("Konqueror", BROWSER_KONQUEROR, None),
    ("Lynx", BROWSER_LYNX, None),
    ("Links", BROWSER_LINKS, None),
    ("Netscape", BROWSER_NETSCAPE, None),
]


# EXCLUDEDBYFUNC is a way to exclude certain log entries from being
# processed, for example to prevent your own accesses from being
# recorded in the statistics.
def EXCLUDEDBYFUNC(line):
    if line['ip'] in PLACES_I_VISIT and line['agent'] in MY_AGENTS:
        return 1
    return 0

try:
    import gdchart
    # Options to pass to gdchart.option
    GDCHART_OPTIONS = {
        "bg_color": 0xffffff, "bg_transparent":0,
        "pie_color":(0xff8080, 0x80ff80, 0x8080ff, 0xffff80, 0xff80ff,
            0x80ffff, 0xff0000, 0x00ff00, 0x0000ff, 0xffff00, 0xff00ff,
            0x00ffff, 0xff8000, 0x80ff00, 0x8000ff, 0xff0080, 0x80ff00,
            0xff8000, 0xffffff, 0x000000),
        "edge_color":0x0, "line_color":0x0,
        "label_line":0, "label_dist":15,
        "percent_labels":gdchart.GDCPIE_PCT_RIGHT, "percent_format":" %.0f%%",
        "label_font":gdchart.GDC_TINY,
        "format":gdchart.GDC_PNG
    }
    # Pixel width of the graph
    PIEWIDTH = 500

    # Pixel height of the graph
    PIEHEIGHT = 350
except ImportError:
    print "Charting support not included"

# Can override the above configuration here - for example to change
# TEMPLATE_START for different locations
try:
    from localconfig import *
except ImportError:
    print "Local configuration not found"

EXCLUDEDREFERERS = [
    "",
] + MYREFERER + REFERERSPAMMERS + GOOGLECACHE + SEARCHENGINES

