X-Git-Url: https://defaria.com/gitweb/?a=blobdiff_plain;f=web%2Frobots.txt;fp=web%2Frobots.txt;h=4f83a2bb72997850dbef0eba3a394e779bbffcd8;hb=95b32ff7e5d951d813a7c1b8256a27fff8bb6e8e;hp=0000000000000000000000000000000000000000;hpb=2f529bacabd7119ae4266b7bd513c8f9a4332d20;p=clearscm.git diff --git a/web/robots.txt b/web/robots.txt new file mode 100644 index 0000000..4f83a2b --- /dev/null +++ b/web/robots.txt @@ -0,0 +1,59 @@ +# Robots.txt: Configure which spiders can crawl this site + +# Why is this server crawling my site? +User-agent: panscient_data_services.demarc.cogentco.com +Disallow: / +User-agent: Inktomi +Disallow: / +User-agent: BaiDuSpider +Disallow: / +User-agent: crawl +Disallow: / +User-agent: GigaBot +Disallow: / +User-agent: arks +Disallow: / +User-agent: EchO! +Disallow: / +User-agent: Viola +Disallow: / +User-agent: hit +Disallow: / +User-agent: WISENutbot +Disallow: / +User-agent: BBot +Disallow: / +User-agent: spider +Disallow: / +User-agent: psbot +Disallow: / +User-agent: SurveyBot +Disallow: / + +# Allow all others not listed above +User-agent: * +Disallow: /Backgrounds +Disallow: /bin +Disallow: /binme +Disallow: /doc +Disallow: /Fonts +Disallow: /gallery +Disallow: /Icons +Disallow: /Images +Disallow: /INS +Disallow: /Legal +Disallow: /msoffice +Disallow: /Music +Disallow: /Olga +Disallow: /Personal +Disallow: /Pictures +Disallow: /Senators +Disallow: /Software +Disallow: /Sounds +Disallow: /Warsaw +Disallow: /Wedding +Disallow: /jinzora +Disallow: /jinzora2 +Disallow: /blogs/Status +Disallow: /IBM +Disallow: /Broadcom