summaryrefslogtreecommitdiffstats
path: root/tools/3rdparty/sitemap_gen-1.4/example_config.xml
diff options
context:
space:
mode:
Diffstat (limited to 'tools/3rdparty/sitemap_gen-1.4/example_config.xml')
-rw-r--r--tools/3rdparty/sitemap_gen-1.4/example_config.xml164
1 files changed, 164 insertions, 0 deletions
diff --git a/tools/3rdparty/sitemap_gen-1.4/example_config.xml b/tools/3rdparty/sitemap_gen-1.4/example_config.xml
new file mode 100644
index 0000000..2e37eaa
--- /dev/null
+++ b/tools/3rdparty/sitemap_gen-1.4/example_config.xml
@@ -0,0 +1,164 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ sitemap_gen.py example configuration script
+
+ This file specifies a set of sample input parameters for the
+ sitemap_gen.py client.
+
+ You should copy this file into "config.xml" and modify it for
+ your server.
+
+
+ ********************************************************* -->
+
+
+<!-- ** MODIFY **
+ The "site" node describes your basic web site.
+
+ Required attributes:
+ base_url - the top-level URL of the site being mapped
+ store_into - the webserver path to the desired output file.
+ This should end in '.xml' or '.xml.gz'
+ (the script will create this file)
+
+ Optional attributes:
+ verbose - an integer from 0 (quiet) to 3 (noisy) for
+ how much diagnostic output the script gives
+ suppress_search_engine_notify="1"
+ - disables notifying search engines about the new map
+ (same as the "testing" command-line argument.)
+ default_encoding
+ - names a character encoding to use for URLs and
+ file paths. (Example: "UTF-8")
+-->
+<site
+ base_url="http://www.example.com/"
+ store_into="/var/www/docroot/sitemap.xml.gz"
+ verbose="1"
+ >
+
+ <!-- ********************************************************
+ INPUTS
+
+ All the various nodes in this section control where the script
+ looks to find URLs.
+
+ MODIFY or DELETE these entries as appropriate for your server.
+ ********************************************************* -->
+
+ <!-- ** MODIFY or DELETE **
+ "url" nodes specify individual URLs to include in the map.
+
+ Required attributes:
+ href - the URL
+
+ Optional attributes:
+ lastmod - timestamp of last modification (ISO8601 format)
+ changefreq - how often content at this URL is usually updated
+ priority - value 0.0 to 1.0 of relative importance in your site
+ -->
+ <url href="http://www.example.com/stats?q=name" />
+ <url
+ href="http://www.example.com/stats?q=age"
+ lastmod="2004-11-14T01:00:00-07:00"
+ changefreq="yearly"
+ priority="0.3"
+ />
+
+
+ <!-- ** MODIFY or DELETE **
+ "urllist" nodes name text files with lists of URLs.
+ An example file "example_urllist.txt" is provided.
+
+ Required attributes:
+ path - path to the file
+
+ Optional attributes:
+ encoding - encoding of the file if not US-ASCII
+ -->
+ <urllist path="example_urllist.txt" encoding="UTF-8" />
+
+
+ <!-- ** MODIFY or DELETE **
+ "directory" nodes tell the script to walk the file system
+ and include all files and directories in the Sitemap.
+
+ Required attributes:
+ path - path to begin walking from
+ url - URL equivalent of that path
+
+ Optional attributes:
+ default_file - name of the index or default file for directory URLs
+ -->
+ <directory path="/var/www/icons" url="http://www.example.com/images/" />
+ <directory
+ path="/var/www/docroot"
+ url="http://www.example.com/"
+ default_file="index.html"
+ />
+
+
+ <!-- ** MODIFY or DELETE **
+ "accesslog" nodes tell the script to scan webserver log files to
+ extract URLs on your site. Both Common Logfile Format (Apache's default
+ logfile) and Extended Logfile Format (IIS's default logfile) can be read.
+
+ Required attributes:
+ path - path to the file
+
+ Optional attributes:
+ encoding - encoding of the file if not US-ASCII
+ -->
+ <accesslog path="/etc/httpd/logs/access.log" encoding="UTF-8" />
+ <accesslog path="/etc/httpd/logs/access.log.0" encoding="UTF-8" />
+ <accesslog path="/etc/httpd/logs/access.log.1.gz" encoding="UTF-8" />
+
+
+ <!-- ** MODIFY or DELETE **
+ "sitemap" nodes tell the script to scan other Sitemap files. This can
+ be useful to aggregate the results of multiple runs of this script into
+ a single Sitemap.
+
+ Required attributes:
+ path - path to the file
+ -->
+ <sitemap path="/var/www/docroot/subpath/sitemap.xml" />
+
+
+ <!-- ********************************************************
+ FILTERS
+
+ Filters specify wild-card patterns that the script compares
+ against all URLs it finds. Filters can be used to exclude
+ certain URLs from your Sitemap, for instance if you have
+ hidden content that you hope the search engines don't find.
+
+ Filters can be either type="wildcard", which means standard
+ path wildcards (* and ?) are used to compare against URLs,
+ or type="regexp", which means regular expressions are used
+ to compare.
+
+ Filters are applied in the order specified in this file.
+
+ An action="drop" filter causes exclusion of matching URLs.
+ An action="pass" filter causes inclusion of matching URLs,
+ shortcutting any other later filters that might also match.
+ If no filter at all matches a URL, the URL will be included.
+ Together you can build up fairly complex rules.
+
+ The default action is "drop".
+ The default type is "wildcard".
+
+ You can MODIFY or DELETE these entries as appropriate for
+ your site. However, unlike above, the example entries in
+ this section are not contrived and may be useful to you as
+ they are.
+ ********************************************************* -->
+
+ <!-- Exclude URLs that end with a '~' (IE: emacs backup files) -->
+ <filter action="drop" type="wildcard" pattern="*~" />
+
+ <!-- Exclude URLs within UNIX-style hidden files or directories -->
+ <filter action="drop" type="regexp" pattern="/\.[^/]*" />
+
+</site>