User-agent: Mediapartners-Google*
Disallow:
User-agent: *
Disallow: /un-dossier-specifique/
Disallow: /un-autre-dossier-specifique/
User-agent: fast
Disallow:
User-agent: *
Disallow:/
Permet au spider d'Alltheweb et de Lycos (dont l'index est également fourni par Fast) de tout aspirer, mais refuse les autres robots.
# mostly spambots/spybots/offline downloaders that ignore robots.txt
# These bots are anoying website harvesting tools, webdownloaders, and a few misc annoyances.
RewriteCond %{HTTP_USER_AGENT} ^[A-Z]+$ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(AcoiRobot|FlickBot|webcollage) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Alligator|DA.?[0-9]|DC\-Sakura|Download.?(Demon|Express|Master|Wonder)|FileHound) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*almaden.* [OR]
RewriteCond %{HTTP_USER_AGENT} anarchie [NC,OR]
RewriteCond %{HTTP_USER_AGENT} AsiaNetBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*attach.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ATHENS [NC,OR]
RewriteCond %{HTTP_USER_AGENT} autohttp [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*BackWeb.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Bandit.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} bew [NC,OR]
RewriteCond %{HTTP_USER_AGENT} BlackWidow [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Bot\ mailto:craftbot@yahoo.com [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.Browse\s [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Buddy.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} Bullseye [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ChinaClaw [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Collector.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Copier.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Crawler.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} Crescent [NC,OR]
RewriteCond %{HTTP_USER_AGENT} curl [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^DA \d\.\d+" [OR]
RewriteCond %{HTTP_USER_AGENT} devsoft's\ http\ component [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Deweb [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Digimarc [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Digger [NC,OR]
RewriteCond %{HTTP_USER_AGENT} digout4uagent [NC,OR]
RewriteCond %{HTTP_USER_AGENT} DIIbot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^DiscoPump.* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} DISCo\ pump [NC,OR]
RewriteCond %{HTTP_USER_AGENT} dloader(NaverRobot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Download\ Demon [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^Download" [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Downloader.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} "DTS Agent" [OR]
RewriteCond %{HTTP_USER_AGENT} EasyDL/\d\.\d+ [OR]
RewriteCond %{HTTP_USER_AGENT} eCatch [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ecollector [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Educate\ Search [NC,OR]
RewriteCond %{HTTP_USER_AGENT} EirGrabber [NC,OR]
RewriteCond %{HTTP_USER_AGENT} EmailCollector [NC,OR]
RewriteCond %{HTTP_USER_AGENT} EmailSiphon [NC,OR]
RewriteCond %{HTTP_USER_AGENT} EmailWolf [NC,OR]
RewriteCond %{HTTP_USER_AGENT} EO\ Browse [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.Eval [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Express|Mister|Web).?(Web|Pix|Image).?(Pictures|Collector)? [NC,OR]
RewriteCond %{HTTP_USER_AGENT} extractor [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ExtractorPro [NC,OR]
RewriteCond %{HTTP_USER_AGENT} EyeNetIE [NC,OR]
RewriteCond %{HTTP_USER_AGENT} fastlwspider [NC,OR]
RewriteCond %{HTTP_USER_AGENT} FEZhead [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Fetch [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Fetch\ API\ Request [OR]
RewriteCond %{HTTP_USER_AGENT} ^(Flash|Leech)Get [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Franklin\ Locator [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Fresh|Lightning|Mass|Real|Smart|Speed|Star).?Download(er)? [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Full\ Web\ Bot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Gamespy|Go!Zilla|iGetter|JetCar|Net(Ants|Pumper)|SiteSnagger|Teleport.?Pro) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Getleft [NC,OR]
RewriteCond %{HTTP_USER_AGENT} GetRight [NC,OR]
RewriteCond %{HTTP_USER_AGENT} GetURL [NC,OR]
RewriteCond %{HTTP_USER_AGENT} GetWebPage [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^GornKer [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*gotit.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} Gozilla [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Go!Zilla.* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} go-ahead-got-it [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Grabber.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*GrabNet.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} Grafula [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Harvest [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*HMView.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} HTML\ Works [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*HTTrack.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ia_archiver [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Image.?(fetch|Stripper|Sucker) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} IncyWincy [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Industry\ Program [NC,OR]
RewriteCond %{HTTP_USER_AGENT} InterGET [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Internet\ Explore\ 5\.x [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^InternetNinja.* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Internet\ Ninja [NC,OR]
RewriteCond %{HTTP_USER_AGENT} InternetSeer.com [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Irvine [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^JetCar.* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} JOC\ Web\ Spider [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*JOC.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} KWebGet [NC,OR]
RewriteCond %{HTTP_USER_AGENT} larbin [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Likse.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^LinkWalker [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*LWP [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Mag-Net.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Magnet.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} MCspider [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Memo.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} Microsoft\ URL [NC,OR]
RewriteCond %{HTTP_USER_AGENT} MIDown\ tool [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Mirror.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} Missauga\ Locator [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Mister\ PiX [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Monster [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Mozilla.*NEWT [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Mozilla\/3\.0\.\+Indy\ Library [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Mozilla\/3.Mozilla\/2\.01 [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Mozilla\/4\.0$ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Mozzilla [NC,OR]
RewriteCond %{HTTP_USER_AGENT} MSIECrawler [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^NASA\ Search\ 1\.0$ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Navroad.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} NearSite [NC,OR]
RewriteCond %{HTTP_USER_AGENT} net.?(ants|attache|Carta|mechanic|spider|vampire|zip) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} NICErsPRO [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ninja [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Octopus [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Offline.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} OpaL [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Openfind [NC,OR]
RewriteCond %{HTTP_USER_AGENT} OpenTextSiteCrawler [NC,OR]
RewriteCond %{HTTP_USER_AGENT} PackRat [NC,OR]
RewriteCond %{HTTP_USER_AGENT} PageGrabber [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Papa\ Foto [NC,OR]
RewriteCond %{HTTP_USER_AGENT} pavuk [NC,OR]
RewriteCond %{HTTP_USER_AGENT} PICgrabber [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*pcBrowser.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} Plucker [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Pockey.* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Production\ Bot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Program\ Shareware [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*prospector [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^psbot [OR]
RewriteCond %{HTTP_USER_AGENT} PushSite [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Reaper.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Recorder.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ReGet [NC,OR]
RewriteCond %{HTTP_USER_AGENT} RepoMonkey [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Rover [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Rsync [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Siphon.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^Scooter-W3.* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ScoutAbout [NC,OR]
RewriteCond %{HTTP_USER_AGENT} searchterms\.it [NC,OR]
RewriteCond %{HTTP_USER_AGENT} semanticdiscovery [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Shai [NC,OR]
RewriteCond %{HTTP_USER_AGENT} sitecheck [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Snake.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} Spegla [NC,OR]
RewriteCond %{HTTP_USER_AGENT} SpiderBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Stripper.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Sucker.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*SuperBot.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} SuperHTTP [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.Surf [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Surfbot.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} SurfWalker [NC,OR]
RewriteCond %{HTTP_USER_AGENT} tAkeOut [NC,OR]
RewriteCond %{HTTP_USER_AGENT} tarspider [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Teleport.* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Templeton [NC,OR]
RewriteCond %{HTTP_USER_AGENT} UtilMind [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Vacuum.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} VoidEYE [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Web.?(Auto|Cop|dup|Fetch|Filter|Gather|Go|Leach|Mine|Mirror|Pix|QL|RACE|Sauger) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} web.?(bandit|collector|devil|downloader|hook|mole|reaper|sucker|site|snake|stripper|weasel) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Web.?(site.?(eXtractor|Quester)|Capture|Snake|ster|Strip|Stripper|Suck|vac|walk|Whacker|ZIP) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebEMailExtrac.* [OR]
RewriteCond %{HTTP_USER_AGENT} web.by.mail [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Wget.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Whacker.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Widow.*$ [OR]
RewriteCond %{HTTP_USER_AGENT} w3mir [NC,OR]
RewriteCond %{HTTP_USER_AGENT} WhosTalking [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Widow [NC,OR]
RewriteCond %{HTTP_USER_AGENT} WUMPUS [NC,OR]
RewriteCond %{HTTP_USER_AGENT} www\.pl [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Xaldon\ WebSpider [NC,OR]
RewriteCond %{HTTP_USER_AGENT} XGET [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Yandex [NC,OR]
RewriteCond %{HTTP_USER_AGENT} zeus [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Zeus.*Webster [OR]
# rude bot
RewriteCond %{HTTP_USER_AGENT} Atomz [OR]
RewriteCond %{HTTP_USER_AGENT} FlickBot [OR]
RewriteCond %{HTTP_USER_AGENT} "efp@gmx\.net" [OR]
RewriteCond %{HTTP_USER_AGENT} imagefetch [OR]
RewriteCond %{HTTP_USER_AGENT} "LINKS ARoMATIZED" [OR]
RewriteCond %{HTTP_USER_AGENT} "mister pix" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} PersonaPilot [OR]
RewriteCond %{HTTP_USER_AGENT} Sqworm [OR]
RewriteCond %{HTTP_USER_AGENT} SurveyBot [OR]
# Web Content International
RewriteCond %{REMOTE_ADDR} ^65\.102\.12\.2(2[4-9]|3[01])$ [OR]
RewriteCond %{REMOTE_ADDR} ^65\.102\.17\.(3[2-9]|[4-6][0-9]|7[01]|8[89]|9[0-5]|10[4-9]|11[01])$ [OR]
RewriteCond %{REMOTE_ADDR} ^65\.102\.23\.1(5[2-9]|6[0-7])$ [OR]
# NameProtect peddles their online brand monitoring to unsuspecting and gullible companies
# looking for people to sue. Despite the claims on their robot information page, they do not
# respect robots.txt; in fact, they spoof their User-Agent in multiple ways to avoid detection.
# I have banned them by User-Agent and IP address.
RewriteCond %{REMOTE_ADDR} ^12\.148\.196\.(12[8-9]|1[3-9][0-9]|2[0-4][0-9]|25[0-5])$ [OR]
RewriteCond %{REMOTE_ADDR} ^12\.148\.209\.(19[2-9]|2[0-4][0-9]|25[0-5])$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^NPBot [NC,OR]
# Cyveillance is a spybot that scours the web for copyright violations and damaging information on
# behalf of clients such as the RIAA and MPAA. Their robot spoofs its User-Agent to look like Internet
# Explorer, and it completely ignores robots.txt. I have
# banned it by IP address.
RewriteCond %{REMOTE_ADDR} ^63\.148\.99\.2(2[4-9]|[34][0-9]|5[0-5])$ [OR]
RewriteCond %{REMOTE_ADDR} ^63\.226\.3[34]\. [OR]
RewriteCond %{REMOTE_ADDR} ^63\.212\.171\.161$ [OR]
RewriteCond %{REMOTE_ADDR} ^65\.118\.41\.(19[2-9]|2[01][0-9]|22[0-3])$ [OR]
# Unknown
# unknown.Level3.net
RewriteCond %{REMOTE_ADDR} ^64\.156\.198\.(6[89]|7[0-9]|80)$ [OR]
# Wordtracker
RewriteCond %{REMOTE_ADDR} ^128\.242\.197\.101$ [OR]
# Turnitin spybot
RewriteCond %{REMOTE_ADDR} ^64\.140\.49\.6([6-9])$ [OR]
RewriteCond %{HTTP_USER_AGENT} TurnitinBot [OR]
#pour simplement les interdires
RewriteCond %{HTTP_USER_AGENT} ^.*FileHound.*$RewriteRule .* - [F,L]
#ou pour les rediriger vers une certaine page sur le web :
#RewriteRule /* http://www.nospamtoday.com/index.html [L,R]
[b]#tu peut aussi ajouter ces options de sécurité :[/b]
# this ruleset is to "stop" stupid attempts to use MS IIS expolits on us
# NIMDA
RewriteCond %{REQUEST_URI} /(admin¦cmd¦httpodbc¦nsiislog¦root¦shell)\.(dll¦exe) [NC]
RewriteRule .* - [F,L]
# CODERED
RewriteCond %{REQUEST_URI} /default\.(ida¦idq)$ [NC,OR]
RewriteCond %{REQUEST_URI} /.*\.printer$ [NC]
RewriteRule .* - [F,L]
# IE's "make available offline" mode
RewriteCond %{HTTP_USER_AGENT} MSIECrawler [OR]
# You may want to enable these lines below to disallow php and perl scripts to access your site
#RewriteCond %{HTTP_USER_AGENT} ^.*PHP.*$ [OR]
#RewriteCond %{HTTP_USER_AGENT} ^.*libwww-perl [NC,OR]
# Ignorant user trying to edit my site
RewriteCond %{HTTP_USER_AGENT} FrontPage [OR]
#this one will ban everything microsoft. Use with caution.
RewriteCond %{HTTP_USER_AGENT} ^(Microsoft|MFC).(Data|URL|WebDAV|Foundation).(Access|Control|MiniRedir|Class) [NC,OR]
# MSOffice
RewriteCond %{REQUEST_URI} ^/(MSOffice|_vti) [NC,OR]
# Various
RewriteCond %{REQUEST_URI} ^/(bin/|cgi/|cgi\-local/|cgi\-bin/|sumthin) [NC,OR]
RewriteCond %{THE_REQUEST} ^GET\ http [NC,OR]
RewriteCond %{REQUEST_URI} /sensepost\.exe [NC,OR]
RewriteCond %{REQUEST_METHOD}!^(GET|HEAD|POST) [NC,OR]
# Block if useragent and referer are unknown.
# the referer string will cause some problems with mozilla so it has been diabled
RewriteCond %{HTTP_REFERER} ^-$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^-$
RewriteRule .* - [F,L]
Sur quoi te bases-tu pour dire qu'il ne rame pas plus ? Tu as des stats détaillées de ta charge CPU ?Perso, j'en utilise un peut moins de la moitié, et mon serveur mutualisé ne rame pas plus (et je fais de l'url rewriting en plus)
Furtif a dit:Verifier la validité d'un fichier robots.txt :
http://www.searchengineworld.com/cgi-bin/robotcheck.cgi
# You may want to enable these lines below to disallow php and perl scripts to access your site
#RewriteCond %{HTTP_USER_AGENT} ^.*PHP.*$ [OR]
#RewriteCond %{HTTP_USER_AGENT} ^.*libwww-perl [NC,OR]
Ca me parait bizzard tout ça...Furif a dit:Et sur le site tout ce qu'il faut savoir sur les robots.txt
User-agent: *
Disallow: /
easyzik a dit:heuuuu... petit problème là.... Les robots ne passent plus depuis une semaine...
je fais quoi là ??