Knowledge Base

Programmable Layouts:
Creating a Google Sitemap

Last Modified:
24 Apr 2019
User Level:
Power User

Description

Though you can create sitemaps through the TERMINALFOUR UI, sometimes you might want more control. Using a Programmable Layout will provide you with more flexibility when you need it. 

If you need a quick primer on sitemaps, look no further than this article from Google. 

Instructions

1. Create a Content Type

Create a Content Type called Sitemap in Assets > Content Types.
Add two Plain Text elements named "Channel id" and "Exclusions" to the Content Type:

NameTypeRequiredMax SizeNote
Channel id Plain Text Yes 80  
Exclusions Plain Text No 1000 The max size is dependant on the number of exclusions you require.
File Extensions Plain Text No 80 Only add if you want to include media types like images or video to your sitemap e.g. jpg, png, mp4 etc. 

Save Changes.

2. Create a Content Layout

Create a Content Layout called text/html and set the Content Layout Processor to JavaScript Content.
Copy & paste the code below into the layout:

Screenshot of Content Layout for Sitemap Programmable Layout


try {
  importClass(com.terminalfour.database.DatabaseUtils);
  importClass(com.terminalfour.channel.IChannelManager);
  importClass(com.terminalfour.spring.ApplicationContextProvider);
  importClass(com.terminalfour.publish.utils.BrokerUtils);
  importClass(java.lang.StringBuilder);
   
  // Get the default index file name used in the content channel
  function getContentChannelId(){
    return BrokerUtils.processT4Tags(dbStatement, publishCache, section, content, language, isPreview, '');
  } 
  // Retrieves channel of published content
  function getContentChannel(id){
    var myChannelManager = ApplicationContextProvider.getBean(IChannelManager); 
     
    return myChannelManager.get(id);
  }
  //Get the users extra file extensions permitted in the site map
  function getFileExtensions(){
    return BrokerUtils.processT4Tags(dbStatement, publishCache, section, content, language, isPreview, '');
  }
  // Build a query to retrieve the path and last modified fields from the database for the content Channel
  function buildPubFileInfoQuery(fileExtensions, channelId, defaultFileName){
    var query = new StringBuilder("SELECT path, last_modified FROM published_file_info WHERE channel_id = ").append(channelId),
        fileExtArray;
     
    if (fileExtensions.length() > 0){
      query.append(" AND (path LIKE '%")
      .append(defaultFileName)
      .append("'");
      fileExtArray = fileExtensions.split(",");
      for (var i = 0; i < fileExtArray.length; i++){
        fileExtArray[i] = fileExtArray[i].trim();
      }
      for (ext in fileExtArray){
        query.append(" OR path LIKE '%")
        .append(fileExtArray[ext])
        .append("'");
      }
      query.append(") AND approved_pending = 0");
    }else{
      query.append(" AND path LIKE '%")
      .append(defaultFileName)
      .append("' AND approved_pending = 0");
    }
    return query;
  }
  // Retrieves the paths and dates to be used in the sitemap
  function getPubFileInfo(query){
    var dbConnection,
        pubFileInfo = [],
        paths = [],
        path = 0,
        lastModDates = [],
        lastModDate = 0,
        pathStr,
        lastModStr,
        pubFileInfoStmt,
        pubFileInfoRS;
    try {
      dbConnection  = DatabaseUtils.getConnection();
      pubFileInfoStmt = dbConnection.createStatement();
      pubFileInfoRS = pubFileInfoStmt.executeQuery(query);
      while (pubFileInfoRS.next()){
        paths[path] = pubFileInfoRS.getString("path");    
        lastModDates[lastModDate] = pubFileInfoRS.getString("last_modified");   
        path++;
        lastModDate++;
      }
      pubFileInfo[0] = paths;
      pubFileInfo[1] = lastModDates;
    }catch (err){
      document.write ("An error occurred getting the publish file information for the T4 sitemap");
    }finally{
      DatabaseUtils.closeQuietly(pubFileInfoRS);
      DatabaseUtils.closeQuietly(pubFileInfoStmt);
      DatabaseUtils.closeQuietly(dbConnection);
    }
    return pubFileInfo;
  }
   
  // Format path for use in the Sitemap
  function formatPath(paths, defaultFileName, sitemapChannel){
    var pubDir = sitemapChannel.getFileOutputPath(),
        path = 0;
   
    while (path < paths.length){
      if (paths[path].indexOf(defaultFileName) >= 0){
        paths[path] = paths[path].replace(defaultFileName, '');
      } 
      paths[path] = paths[path].replace(pubDir, "");
      paths[path] = paths[path].replace("\\", "/");
      paths[path] = paths[path].toLowerCase();
      path++;
    }    
    return paths;
  }
   
  // Format last modified fields for use in the Sitemap
  function  formatLastModified(lastModDates){
    var lastModDate = 0;
     
    while (lastModDate < lastModDates.length){
      lastModDates[lastModDate] = lastModDates[lastModDate].replaceAll(" ([01]?[0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9].([0-9]+)", "");
      lastModDate++;
    }    
    return lastModDates;
  }
  // Exclusions are what pages we don't want in the sitemap
  function getExclusions(){
    var exclusionArray = [],
        exclusions = BrokerUtils.processT4Tags(dbStatement, publishCache, section, content, language, isPreview, '');
    if (exclusions.length() > 0){
      exclusionArray = exclusions.split(",");
      for (var i = 0; i < exclusionArray.length; i++){
        exclusionArray[i] = exclusionArray[i].trim();
      }
    }else{
      exclusionArray = null;
    }
    return exclusionArray;
  }
  // Check for a valid baseHref and set it if it is not
  function setBaseHrefValidity(){
    var baseHref = sitemapChannel.getBaseHref();
    if (baseHref.substring(baseHref.length()-1) !== '/'){
      baseHref += '/';
    }
     
    return baseHref;
  }
  /* Check if spaces are to be replaced in section names. If so, exclusions need to have their spaces replaced.
  This should cover spaces in output uri's being replaced also because if section name spaces are removed, the
  output uri ones tend to be also. Both options having different enabled status have not been accounted for. */
  function formatExclusions(sitemapChannel, exclusionArray){
    var query,
        fileNameSep,
        configRS,
        dbConnection,
        configStmt;
    
    if (sitemapChannel.isConvertSpacesInSectionNameEnabled() == true){
      try{
        dbConnection = DatabaseUtils.getConnection();
        configStmt = dbConnection.createStatement();
        query = "SELECT config_value FROM config_option WHERE config_key = 'previewPublish.replaceSpacesInFilenamesWith'";
        configRS = configStmt.executeQuery(query);
        if (configRS.next()){
          fileNameSep = configRS.getString('config_value');
        }
        if (fileNameSep === null || fileNameSep === '' || fileNameSep === undefined){        
          fileNameSep = ',';   
        } 
      }catch (err){
        document.write ("An error occurred getting the file part separator for the T4 sitemap");
      }finally{
        DatabaseUtils.closeQuietly(configRS);
        DatabaseUtils.closeQuietly(configStmt);
        DatabaseUtils.closeQuietly(dbConnection);
      }
    }else{
      fileNameSep = '';
    }
    if (exclusionArray !== null){
      for(var i = 0; i < exclusionArray.length; i++){ 
        exclusionArray[i] = exclusionArray[i].replaceAll("[\'\,\(\)]", "");
        exclusionArray[i] = exclusionArray[i].replace(" ", fileNameSep);
        if (exclusionArray[i].indexOf('/') === 0){
          exclusionArray[i] = exclusionArray[i].substr(1);
        }
      }
    }
    return exclusionArray;
  }
  // Skip over any excluded URL's and generate the Sitemap XML for the remaining ones
  function generateSitemap(paths, exclusionArray, lastModDates){
    var len = paths.length,
        skip,
        baseHref = setBaseHrefValidity();
    for (var path = 0, date = 0;path < len; path++, date++){
      skip = false;
      // Check if there are exclusions to skip in sitemap
      if (exclusionArray !== null){
        if (exclusionArray.length > 0 || exclusionArray.length != undefined){
          for(var i = 0; i < exclusionArray.length; i ++ ){
            if (paths[path].indexOf(exclusionArray[i].toLowerCase()) >= 0){
              skip = true;
            }
          }
        }
      }
      if (skip === false){
        document.writeln('');
        document.writeln('' + baseHref + paths[path] + '');
        document.writeln('daily');
        document.writeln('1');
        document.writeln('' + lastModDates[date] + '');
        document.writeln('');
      }
    }
  }
     
  /*--------------------------------------------------------------------------
  Main program
  --------------------------------------------------------------------------*/
  var channelId,
    contentChannel,
    query,
    pubFileInfo,
    paths = [],
    lastModDates = [],
    exclusionArray = [],
    sitemapChannel = publishCache.getChannel(),
    defaultFileName;
  channelId = getContentChannelId();
  if (!isNaN(channelId)){
    contentChannel = getContentChannel(channelId);
    defaultFileName = contentChannel.getIndexFileName();
    query = buildPubFileInfoQuery(getFileExtensions(), channelId, defaultFileName);
    pubFileInfo = getPubFileInfo(query);
    paths = formatPath(pubFileInfo[0], defaultFileName, sitemapChannel);
    lastModDates = formatLastModified(pubFileInfo[1]);
    exclusionArray = getExclusions();
    exclusionArray = formatExclusions(sitemapChannel, exclusionArray);
    generateSitemap(paths, exclusionArray, lastModDates);
  }
}catch(err){
  document.write(err);
}

3. Create a Page Layout

Go to Assets > Page Layouts and create a Page Layout named "Sitemap" and add the Header and Footer code below:

Animated GIF of Sitemap Programmable Layout Page Layout

Header Code

<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">

Footer Code

</urlset>

4. Site Structure

Create a Section named "SiteMaps" in the root Section of the Site Structure, e.g, under the "Home" Section in the screenshot below. This is especially useful if you have multiple sites that require a sitemap each since it keeps them together in one location.

Don't create the Section in another Channel.Create a Section in "SiteMaps" named after the website you will be creating the sitemap for, e.g. "www.mysite.com Sitemap". This is recommended so you can easily find it later:

Screenshot of the Programmable Layout Sitemap Site Structure

5. Create a Channel

Create a new Channel named after the Channel or site you are creating it for, e.g, "www.mysite.com Sitemap".
Other options should include: 

NameDescription
Type text/html
Root Section the Root Section you created above
Languages the default Language of the website's Channel 
Output Directory the same Output Directory as the Channel you are outputting the sitemap for 
Default filename this is the filename of your sitemap once it's published out. You can set this to "sitemap-en.xml"
Base HREF  the domain of your website that the sitemap is for, e.g. "http://www.mysite.com".

Site Root set the Site Root to a forward slash: "/"
Channel publish URL the same setting as the Channel you are creating the sitemap for
Path Conversion the same setting as the Channel you are creating the sitemap for
Convert spaces in the same settings as the Channel you are creating the sitemap for
Page Layouts select the Page Layout created earlier in Step 3
Fulltext defaults   the required fields for fulltext, Type and File extensions, can be set to N/A, as they are not needed but require a value 

All other fields can be ignored. 

Save Changes.

6. Putting it all together

  • Go to the "SiteMaps" Section created in step 4 and assign the "Sitemap" Content Type to the Section with the Branch option. This will apply the Content Type automatically to any sitemap Channels you create within the "SiteMaps" Section.
  • Go to the Root Section of the sitemap Channel you created and create a Content Item with the Sitemap Content Type.
  • Put the id of the Channel that you are creating the sitemap for, in the "Channel id" element. You can get this from System Administration > Set up Sites & Channels > Channels.
  • In the Exclusions element, add the names of the Channel's Sections which you don't want to appear in the sitemap.  Each Section name should be separated with a comma, e.g, Section A,Section B,Section C. If the Section uses an Output URI, use that instead of the Section name.
  • If you want to add media types to the sitemap, add comma-separated file extensions (without the dot) to the "File Extensions" element, e.g., jpg,png
  • Publish the sitemap by using any type of Publish, e.g. Channel, Section, etc.
  • Check that the sitemap has published out correctly using either of the following methods:
    • Check the output directory of the Channel on the server to make sure your sitemap has appeared in the root of it; look for a file named "sitemap-en.xml".
    • If you have a staging site for your published Channel on the CMS, or you Publish to Live on your webservers, then access the sitemap by substituting the name of the site with the name of your staging site, e.g. {staging-site-url}sitemap-en.xml
  • Once you're happy with the sitemap, you can create a schedule to publish out the sitemap Channel. Any new Sections you create will then be included in the subsequent sitemap publishes. We recommend setting this to a time that is out of hours when no users are on the system and when other scheduled published aren't running. 
  • If you are using Transfer Manager, a Transfer Site should be set up for the sitemap Channel for the new sitemap to be transferred to the webservers