# robots.txt for http://www.microbiology..wustl.edu/ # allow access by all robots User-agent: * # directories with no or transient content disallow: /cDNA_protein_coding.html # sequence data, pointless to index disallow: /images/ # images only, no text to index disallow: /forms/ # form pages only, no content disallow: /templates/ # page templates only, no content disallow: /job/ # job pages, transient content disallow: /styles/ # css pages, silly to index disallow: /upload/ # temp storage for uploaded files # directories whose contents have no external interest disallow: /docEdit/ # micro's instructions on web page updates disallow: /seminar scheduling/ # temporary, working document disallow: /logs/ # log pages, no external interest disallow: /server logs/ # stat pages, no external interest disallow: /stats/ # stat pages, no external interest disallow: /CGIs/ # not for external consumption disallow: /Plug-Ins/ # not for external consumption disallow: /WebSTAR_Directory_Cache/ # not for external consumption disallow: /bt/ # not for external consumption disallow: /dbs/ # private pages disallow: /sysman/ # no external interest disallow: /meetingmaker/ # no external interest # directories that litter the site disallow: /stuff/ # sundry jumk disallow: /old root level files/ # sundry jumk disallow: /tools & examples/ # sundry jumk disallow: /Documentation/ # sundry jumk