# robots.txt file for http://www.aeiveos.com/ # Protect various files and directories from access # ------------------------------------------------------------- # Default for all robots # Note that if you have the robot name cited below, these # Disallows are probably lost/ignored (need to check this). # 060603 RJB - enable universal indexing (for now) # Correct for web agents behaving badly in the future # 090513: Change Crawl-Delay from 2 minutes to 5 minutes (300) User-agent: * Crawl-Delay: 3600 # Disallow: / # Disallow: /AROBOTWARN.html # Directories with files of no interest... Disallow: /gif Disallow: /images Disallow: /forms Disallow: /stats Disallow: /Icons # Directories which are web documentation Disallow: /html Disallow: /perl # Directories which are old hosted sites Disallow: /a4m Disallow: /lef Disallow: /Cryonics # Sites which may have information Disallow: /Aging # Disallow: /au Disallow: /books Disallow: /diet # Disallow: /gerontol Disallow: /homes Disallow: /ml Disallow: /news Disallow: /search Disallow: /software # Disallow: /~bradbury/Authors/Evolution/Munck-L # Allowed... Allow: /Aging/Authors # Allow: /diseases/ Allow: /~bradbury/ Allow: /~rudyb/ ## # ## # ------------------------------------------------------------- ## # Scooter-W3-1.0 = Altavista (trek23.sv.av.com) ## # Now allowing Altavista searches of some directories ## User-agent: Scooter ## Allow: /~bradbury/ ## Allow: /Aging/Authors/ ## # ------------------------------------------------------------- ## # ArchitextSpider = excite ## User-agent: ArchitextSpider ## # - follow general rules - ## # ------------------------------------------------------------- ## # Ask Jeeves = directhit.com ## # User-agent: Mozilla/2.0 ## User-agent: Jeeves ## Disallow: / ## Allow: /~bradbury/ ## Allow: /Aging/Authors/ ## # ------------------------------------------------------------- ## User-agent: grabber ## Disallow: / ## # No searches for directhit.com until it behaves nicely ## # ------------------------------------------------------------- ## User-agent: FAST-WebCrawler ## Disallow: / ## # ------------------------------------------------------------- ## # The following is how the Fast Search shows up in agents_log ## User-agent: Search ## Disallow: / ## # ------------------------------------------------------------- ## # According to their web page (http://www.fastsearch.com/support/crawler.asp) one needs... ## # or is this: FAST-WebCrawler/2.2.7 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html) ??? ## User-agent: fast ## Disallow: / ## # ------------------------------------------------------------- ## # Google: Googlebot/2.1 (+http://www.googlebot.com/bot.html) ## # problem: it appears multiple people (other than google) ## # are declaring "gogglebot" as their search agent. ## # ## # Must have Disallow: / followed by Allow: (whatever). ## # # 041103 RJB -- disable all googlebot searches to keep log files small # 100117 RJB - disallow all again (future test to see if we can control rate) User-agent: Googlebot Crawl-Delay: 3600 Disallow: / ## 050212 RJB -- enable googlebot on bradbury and Authors ## Allow: /~bradbury/ ## Allow: /Aging/Authors/ ## 100117 RJB - Googlebot problem -- rapid fire access to: ## /Aging/Authors and /agethry ## # ------------------------------------------------------------- ## # The Msnbot uses the Crawl-Delay number to determine how much it ## # delays (in seconds) before fetching another page. ## # ## # Must have Disallow: / followed by Allow: (whatever). ## # User-agent: msnbot Crawl-Delay: 3600 ## accesses determined by general rules ## # ------------------------------------------------------------- ## # The internet archiving robot... ## # http://www.archive.org/ ## # http://pages.alexa.com/exec/faqsidos/help/index.html?index=92 ## User-agent: ia_archiver ## # - follow general rules - ## # ------------------------------------------------------------- ## User-agent: JennyBot ## Disallow: / ## # ------------------------------------------------------------- ## # Problem: don't know the name of simpy bot (possibly Googlebot) ## User-agent: simpy ## Disallow: / ## # ------------------------------------------------------------- ## # NaverBot -- someplace offshore ## User-agent: NaverBot ## Disallow: / ## # ------------------------------------------------------------- ## # Yahoo Slurp... ## # http://help.yahoo.com/help/us/ysearch/slurp/index.html ## # http://help.yahoo.com/help/us/ysearch/slurp/slurp-02.html ## # http://help.yahoo.com/help/us/ysearch/slurp/slurp-03.html ## # Slurp (Yahoo): http://help.yahoo.com/help/us/ysearch/slurp) ## # Slurp may have been Inktomi in the past, ## # e.g. Slurp/si (slurp@inktomi.com; http://www.inktomi.com/slurp.html) ## # For now use Crawl-delay rather than Disallow (RJB 060603) ## # ## # Must have Disallow: / followed by Allow: (whatever). ## User-agent: Slurp ## # - follow general rules - ## # ------------------------------------------------------------- ## # From AT&T trivra.com/freespeech.com ## User-agent: tivraSpider ## # Disallow: / ## # ------------------------------------------------------------- ## # Voilabot: VoilaBot BETA 1.2 (http://www.voila.com/) ## # For Voila.com in France, shows up as being from *.x-echo.com ## User-agent: Voilabot ## # Disallow: / ## # ## User-agent: violabot ## # Disallow: / ## # ------------------------------------------------------------- ## # Disallow all picture searching ## User-agent: psbot ## Disallow: / ## # ------------------------------------------------------------- ## # Disallow all polish searching ## User-agent: szukacz ## Disallow: / ## # ------------------------------------------------------------- ## # Disallow all openfind searching (it isn't a 'nice' robot) ## User-agent: Openfind ## Disallow: / ## # ------------------------------------------------------------- ## # Disallow Naver ## User-agent: nabot_1.0 ## Disallow: / ## # ------------------------------------------------------------- ## User-agent: dloader ## Disallow: / ## # ------------------------------------------------------------- ## User-agent: NaverRobot ## Disallow: / ## # Allow: /~bradbury/ ## # ------------------------------------------------------------- ## # Openbot ## User-agent: Openbot ## # Disallow: / ## # ----------------------------------------- ## User-agent: Openfind ## Disallow: / ## # ----------------------------------------- ## # www.WISEnutbot.com - WISEnut agent ## User-agent: ZyBorg ## Disallow: / ## # ----------------------------------------- ## # WebReaper: http://www.webreaper.com/ ## # User-agent: Mozilla/2.0 ## User-agent: WebReaper ## Disallow: /