Knowledge Base

Programmable Layouts:
Creating a Google Sitemap

Last Modified:
11 Nov 2024
User Level:
Power User

Description

Though you can create sitemaps through the Terminalfour UI, sometimes you might want more control. Using a Programmable Layout will provide you with more flexibility when you need it. 

If you need a quick primer on sitemaps, look no further than this article from Google. 

Instructions

1. Create a Content Type

Create a Content Type called Sitemap in Assets > Content Types:

Name Type Required Max Size Note
Channel IDs Plain Text Yes 80 Id's of the channel and/or its microsites  
Base HREF Plain Text Yes 80 Domain of your site
Exclusions Plain Text No 1000 The max size is dependent on the number of exclusions you require.
Include file extensions Plain Text No 80 Only add if you want to include media types like images or video to your sitemap e.g. jpg, png, mp4 etc. 

Save Changes.

2. Create a Content Layout

text/html

Create a Content Layout called text/html (or the default layout that you use on your channel) and set the Content Layout Processor to JavaScript Content.
Copy & paste the code below into the layout:

Screenshot of Content Layout for Sitemap Programmable Layout

try {
  importClass(com.terminalfour.database.DatabaseUtils);
  importClass(com.terminalfour.channel.IChannelManager);
  importClass(com.terminalfour.spring.ApplicationContextProvider);
  importClass(com.terminalfour.publish.utils.BrokerUtils);
  importClass(java.lang.StringBuilder);
    
  //Get the channels and/or microsite id's needed for the sitemap
  function getContentChannelIds(){
    var ids = BrokerUtils.processT4Tags(dbStatement, publishCache, section, content, language, isPreview, '<t4 type="content" name="Channel IDs" output="normal" modifiers="striptags,htmlentities" />').split(",");
     
    for (var i = 0; i < ids.length; i++){
       ids[i] = ids[i].trim();
    }
    return ids;
  }
   
  //Get the users extra file extensions permitted in the site map
  function getFileExtensions(){
    var fileExtensions = BrokerUtils.processT4Tags(dbStatement, publishCache, section, content, language, isPreview, '<t4 type="content" name="Include file extensions" output="normal" modifiers="striptags,htmlentities" />').split(",");
      
    for (var i = 0; i < fileExtensions.length; i++){
        fileExtensions[i] = fileExtensions[i].trim();
    }
    return fileExtensions;
  }
 
  // Build a query to retrieve the path and last modified fields from the database for the content Channel
  function buildPubFileInfoQuery(fileExtensions, channelIds, defaultFileName){
    var query = new StringBuilder("SELECT path, last_modified FROM published_file_info WHERE channel_id");
    var num;
    if (channelIds.length > 1){
      query.append(" IN (");
      for (var id = 0; id < channelIds.length; id++){
        num = id + 1;
        if (id + 1 == channelIds.length){
          query.append(channelIds[id])
          .append(")");
        }else{
          query.append(channelIds[id])
          .append(", ");
        }
      }
    }else{
       query.append(" = ")
       .append(channelIds[0]);
    }
    if (fileExtensions.length == 1 && fileExtensions[0] == ''){
      query.append(" AND path LIKE '%")
      .append(defaultFileName)
      .append("' AND approved_pending = 0");
    }else{
      query.append(" AND (path LIKE '%")
      .append(defaultFileName)
      .append("'");
      for (ext in fileExtensions){
        query.append(" OR path LIKE '%")
        .append(fileExtensions[ext])
        .append("'");
      }
      query.append(") AND approved_pending = 0");
    }                   
    return query;
  }
 
  // Retrieves the paths and dates to be used in the sitemap
  function getPubFileInfo(query){
    var dbConnection,
        pubFileInfo = [],
        paths = [],
        path = 0,
        lastModDates = [],
        lastModDate = 0,
        pathStr,
        lastModStr,
        pubFileInfoStmt,
        pubFileInfoRS;
    try {
      dbConnection  = DatabaseUtils.getConnection();
      pubFileInfoStmt = dbConnection.createStatement();
      pubFileInfoRS = pubFileInfoStmt.executeQuery(query);
      while (pubFileInfoRS.next()){
        paths[path] = pubFileInfoRS.getString("path");   
        lastModDates[lastModDate] = pubFileInfoRS.getString("last_modified");  
        path++;
        lastModDate++;
      }
      pubFileInfo[0] = paths;
      pubFileInfo[1] = lastModDates;
    }catch (err){
      document.write ("An error occurred getting the publish file information for the T4 sitemap");
    }finally{
      DatabaseUtils.closeQuietly(pubFileInfoRS);
      DatabaseUtils.closeQuietly(pubFileInfoStmt);
      DatabaseUtils.closeQuietly(dbConnection);
    }
    return pubFileInfo;
  }
    
  // Format path for use in the Sitemap
  function formatPath(paths, defaultFileName, channel){
    var pubDir = channel.getFileOutputPath().toLowerCase().trim(),
        path = 0;
    
    defaultFileName = "/" + defaultFileName;
    pubDir = pubDir.replace("\\", "/");
    if (pubDir.substring(pubDir.length() - 1) == '/'){
      pubDir = pubDir.substring(0, pubDir.length() - 1);
    }
    while (path < paths.length){
      paths[path] = paths[path].replace("\\", "/").toLowerCase();    
      paths[path] = paths[path].replace(pubDir, "");        
      if (paths[path].indexOf(defaultFileName) >= 0){
        paths[path] = paths[path].replace(defaultFileName, '/');
      }
      path++;
    }   
    return paths;
  }
    
  // Format last modified fields for use in the Sitemap
  function  formatLastModified(lastModDates){
    var lastModDate = 0;
      
    while (lastModDate < lastModDates.length){
      lastModDates[lastModDate] = lastModDates[lastModDate].replaceAll(" ([01]?[0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](\.[0-9])?", "");
      lastModDate++;
    }   
    return lastModDates;
  }
 
  // Exclusions are what pages we don't want in the sitemap
  function getExclusions(){
    var exclusionArray = [],
        exclusions = BrokerUtils.processT4Tags(dbStatement, publishCache, section, content, language, isPreview, '<t4 type="content" name="Exclusions" output="normal" modifiers="striptags,htmlentities" />');
    if (exclusions.length() > 0){
      exclusionArray = exclusions.split(",");
      for (var i = 0; i < exclusionArray.length; i++){
        exclusionArray[i] = exclusionArray[i].trim();
      }
    }else{
      exclusionArray = null;
    }
    return exclusionArray;
  }
 
  // Check for a valid baseHref and set it if it is not
  function getBaseHref(){
    var baseHref = BrokerUtils.processT4Tags(dbStatement, publishCache, section, content, language, isPreview, '<t4 type="content" name="Base HREF" output="normal" modifiers="striptags,htmlentities" />').trim();
    if (baseHref.substring(baseHref.length()-1) == '/'){
      baseHref = baseHref.substring(0, baseHref.length()-1);
    }
    return baseHref;
  }
 
  /* Check if spaces are to be replaced in section names. If so, exclusions need to have their spaces replaced.
  This should cover spaces in output uri's being replaced also because if section name spaces are removed, the
  output uri ones tend to be also. Both options having different enabled status have not been accounted for. */
  function formatExclusions(channel, exclusionArray){
    var query,
        fileNameSep,
        configRS,
        dbConnection,
        configStmt;
     
    if (channel.isConvertSpacesInSectionNameEnabled() == true){
      try{
        dbConnection = DatabaseUtils.getConnection();
        configStmt = dbConnection.createStatement();
        query = "SELECT config_value FROM config_option WHERE config_key = 'previewPublish.replaceSpacesInFilenamesWith'";
        configRS = configStmt.executeQuery(query);
        if (configRS.next()){
          fileNameSep = configRS.getString('config_value');
        }
        if (fileNameSep === null || fileNameSep === '' || fileNameSep === undefined){       
          fileNameSep = ',';  
        }
      }catch (err){
        document.write ("An error occurred getting the file part separator for the T4 sitemap");
      }finally{
        DatabaseUtils.closeQuietly(configRS);
        DatabaseUtils.closeQuietly(configStmt);
        DatabaseUtils.closeQuietly(dbConnection);
      }
    }else{
      fileNameSep = '';
    }
    if (exclusionArray !== null){
      for(var i = 0; i < exclusionArray.length; i++){
        exclusionArray[i] = exclusionArray[i].replaceAll("[\'\,\(\)]", "");
        exclusionArray[i] = exclusionArray[i].replace(" ", fileNameSep);
        var exclusion = new StringBuilder(exclusionArray[i]);
        if (exclusionArray[i].substring(0, 1) != '/'){
          
          exclusion.insert(0, '/');
        }
        if (exclusionArray[i].substring(exclusionArray[i].length() - 1) != '/'){
          
          exclusion.append('/');
        }  
        exclusionArray[i] = exclusion;
      }
    }
    return exclusionArray;
  }
   
  // Skip over any excluded URL's and generate the Sitemap XML for the remaining ones
  function generateSitemap(paths, exclusionArray, lastModDates){
    var len = paths.length,
        skip,
        baseHref = getBaseHref();
    document.writeln('<?xml version="1.0" encoding="UTF-8"?>');
    document.writeln('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">');
    for (var path = 0, date = 0;path < len; path++, date++){
      skip = false;
      // Check if there are exclusions to skip in sitemap
      if (exclusionArray !== null){
        if (exclusionArray.length > 0 || exclusionArray.length != undefined){
          for(var i = 0; i < exclusionArray.length; i ++ ){
            if (paths[path].indexOf(exclusionArray[i].toLowerCase()) >= 0){
              skip = true;
            }
          }
        }
      }
      if (skip === false){
        document.writeln('<url>');
        document.writeln('<loc>' + baseHref + paths[path] + '</loc>');
        document.writeln('<changefreq>daily</changefreq>');
        document.writeln('<priority>1</priority>');
        document.writeln('<lastmod>' + lastModDates[date] + '</lastmod>');
        document.writeln('</url>');
      }
    }
    document.writeln('</urlset>');
  }
      
  /*--------------------------------------------------------------------------
  Main program
  --------------------------------------------------------------------------*/
  var channelId,
    contentChannel,
    query,
    pubFileInfo,
    paths = [],
    lastModDates = [],
    exclusionArray = [],
    channel = publishCache.getChannel(),
    defaultFileName;
    channelId = getContentChannelIds();
    defaultFileName = channel.getIndexFileName();
    query = buildPubFileInfoQuery(getFileExtensions(), channelId, defaultFileName);
    pubFileInfo = getPubFileInfo(query);
    paths = formatPath(pubFileInfo[0], defaultFileName, channel);
    lastModDates = formatLastModified(pubFileInfo[1]);
    exclusionArray = getExclusions();
    exclusionArray = formatExclusions(channel, exclusionArray);
    generateSitemap(paths, exclusionArray, lastModDates);
}catch(err){
  document.write(err);
}

3. Create a Page Layout

Go to Assets > Page Layouts and create a Page Layout named "Blank" and Save (leave the Header and Footer sections empty).

4. Site Structure

Create a Section directly under the channel / site root named "Google Sitemap" with the following settings:

Name Google Sitemap
Show in navigation Disabled
Archive section Enabled
This is important to ensure that it does not impact on publish performance.
Output URI Blank
Output file name sitemap-en.xml
Note: If the Output file name option does not appear, this can be configured in Hierarchy configuration.
Content Types Enable the Sitemap Content Type from Step 1 for this section.
Page Layout Set the Page Layout to be the Blank layout created in step 3.

5. Create Content

Create content in the new Sitemap section with the following values:

Name Google Sitemap
Channel IDs Add in the ID's of the channels or microsites that you want to use. You can get this from System Administration > Set up Sites & Channels > Channels.
Base HREF The http protocol version of your site, e.g. https://www.mysite.com
Include file extensions By default, only your index files are used in the sitemap. To include fulltext pages and any media, put their file extensions here as a comma separated list, e.g. jpg, png, php
Exclusions These are the sections you want to exclude from the sitemap. It's a comma separated list, e.g. Section A,Section B,Section C. If a section has an output uri, you must put that in the content rather than the section name. If the output URI is a path (e.g. contains a / character), then that value should be used. e.g. /section-a/section-1/,/section-b/

Save Changes.

6. Putting it all together

  • Publish the sitemap by using any type of Publish, e.g. Channel, Section, etc.
  • Check that the sitemap has published out correctly using either of the following methods:
    • Check the output directory of the Channel on the server to make sure your sitemap has appeared in the root of it; look for a file named "sitemap-en.xml".
    • If you have a staging site for your published Channel on the Terminalfour application server, or you Publish to Live on your webservers, then access the sitemap by substituting the name of the site with the name of your staging site, e.g. {staging-site-url}sitemap-en.xml
  • Once you're happy with the sitemap, you can create a schedule to publish out your channel, selecting the option to Publish archive sections. Any new Sections you create will then be included in the subsequent sitemap publishes. We recommend setting this to a time that is out of hours when no users are on the system and when other scheduled published aren't running.  You may consider also creating a microsite for the Google Sitemap section, and schedule the publish of the microsite, if that suits your publishing model better.