diff options
Diffstat (limited to 'tools/3rdparty/sitemap_gen-1.4/example_config.xml')
-rw-r--r-- | tools/3rdparty/sitemap_gen-1.4/example_config.xml | 164 |
1 files changed, 164 insertions, 0 deletions
diff --git a/tools/3rdparty/sitemap_gen-1.4/example_config.xml b/tools/3rdparty/sitemap_gen-1.4/example_config.xml new file mode 100644 index 0000000..2e37eaa --- /dev/null +++ b/tools/3rdparty/sitemap_gen-1.4/example_config.xml @@ -0,0 +1,164 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + sitemap_gen.py example configuration script + + This file specifies a set of sample input parameters for the + sitemap_gen.py client. + + You should copy this file into "config.xml" and modify it for + your server. + + + ********************************************************* --> + + +<!-- ** MODIFY ** + The "site" node describes your basic web site. + + Required attributes: + base_url - the top-level URL of the site being mapped + store_into - the webserver path to the desired output file. + This should end in '.xml' or '.xml.gz' + (the script will create this file) + + Optional attributes: + verbose - an integer from 0 (quiet) to 3 (noisy) for + how much diagnostic output the script gives + suppress_search_engine_notify="1" + - disables notifying search engines about the new map + (same as the "testing" command-line argument.) + default_encoding + - names a character encoding to use for URLs and + file paths. (Example: "UTF-8") +--> +<site + base_url="http://www.example.com/" + store_into="/var/www/docroot/sitemap.xml.gz" + verbose="1" + > + + <!-- ******************************************************** + INPUTS + + All the various nodes in this section control where the script + looks to find URLs. + + MODIFY or DELETE these entries as appropriate for your server. + ********************************************************* --> + + <!-- ** MODIFY or DELETE ** + "url" nodes specify individual URLs to include in the map. + + Required attributes: + href - the URL + + Optional attributes: + lastmod - timestamp of last modification (ISO8601 format) + changefreq - how often content at this URL is usually updated + priority - value 0.0 to 1.0 of relative importance in your site + --> + <url href="http://www.example.com/stats?q=name" /> + <url + href="http://www.example.com/stats?q=age" + lastmod="2004-11-14T01:00:00-07:00" + changefreq="yearly" + priority="0.3" + /> + + + <!-- ** MODIFY or DELETE ** + "urllist" nodes name text files with lists of URLs. + An example file "example_urllist.txt" is provided. + + Required attributes: + path - path to the file + + Optional attributes: + encoding - encoding of the file if not US-ASCII + --> + <urllist path="example_urllist.txt" encoding="UTF-8" /> + + + <!-- ** MODIFY or DELETE ** + "directory" nodes tell the script to walk the file system + and include all files and directories in the Sitemap. + + Required attributes: + path - path to begin walking from + url - URL equivalent of that path + + Optional attributes: + default_file - name of the index or default file for directory URLs + --> + <directory path="/var/www/icons" url="http://www.example.com/images/" /> + <directory + path="/var/www/docroot" + url="http://www.example.com/" + default_file="index.html" + /> + + + <!-- ** MODIFY or DELETE ** + "accesslog" nodes tell the script to scan webserver log files to + extract URLs on your site. Both Common Logfile Format (Apache's default + logfile) and Extended Logfile Format (IIS's default logfile) can be read. + + Required attributes: + path - path to the file + + Optional attributes: + encoding - encoding of the file if not US-ASCII + --> + <accesslog path="/etc/httpd/logs/access.log" encoding="UTF-8" /> + <accesslog path="/etc/httpd/logs/access.log.0" encoding="UTF-8" /> + <accesslog path="/etc/httpd/logs/access.log.1.gz" encoding="UTF-8" /> + + + <!-- ** MODIFY or DELETE ** + "sitemap" nodes tell the script to scan other Sitemap files. This can + be useful to aggregate the results of multiple runs of this script into + a single Sitemap. + + Required attributes: + path - path to the file + --> + <sitemap path="/var/www/docroot/subpath/sitemap.xml" /> + + + <!-- ******************************************************** + FILTERS + + Filters specify wild-card patterns that the script compares + against all URLs it finds. Filters can be used to exclude + certain URLs from your Sitemap, for instance if you have + hidden content that you hope the search engines don't find. + + Filters can be either type="wildcard", which means standard + path wildcards (* and ?) are used to compare against URLs, + or type="regexp", which means regular expressions are used + to compare. + + Filters are applied in the order specified in this file. + + An action="drop" filter causes exclusion of matching URLs. + An action="pass" filter causes inclusion of matching URLs, + shortcutting any other later filters that might also match. + If no filter at all matches a URL, the URL will be included. + Together you can build up fairly complex rules. + + The default action is "drop". + The default type is "wildcard". + + You can MODIFY or DELETE these entries as appropriate for + your site. However, unlike above, the example entries in + this section are not contrived and may be useful to you as + they are. + ********************************************************* --> + + <!-- Exclude URLs that end with a '~' (IE: emacs backup files) --> + <filter action="drop" type="wildcard" pattern="*~" /> + + <!-- Exclude URLs within UNIX-style hidden files or directories --> + <filter action="drop" type="regexp" pattern="/\.[^/]*" /> + +</site> |