AdassIndex.py

#
#                       A d a s s  I n d e x . p y
#
#  This is the code for a module that contains a number of utility routines
#  that handle operations connected with subject index entries for ADASS
#  Proceedings.
#
#  WriteSubjectIndex (IndexEntries,OutputFile)
#     Writes a list of entries in ssindex format to a file in hierarchical
#     format.
#
#  ReadIndexList (FilePath)
#     Reads a file in hierarchical format and returns a list of entries in
#     ssindex format.
#
#  'ssindex' format refers to the "topic!sub-topic!sub-topic" form used in
#  subject index entries in a .tex file, where the index string is the argument
#  to an \ssindex{} command.
#
#  'Hierarchical' format refers to the alphabetical hierarchical format used
#  in the text files usually used to record the subject entries used. This
#  looks like the way the entries will appear in the volume index, eg:
#  topic
#     sub-topic
#        sub-topic
#
#  Author(s): Keith Shortridge (keith@knaveandvarlet.com.au)
#
#  Python versions:
#      This code is compatible with both Python 2 and Python 3.
#
#  History:
#     15th Jan 2017. Original version, based on some code in the Index.py and
#                    Finish.py ADASS scripts. KS.
#     18th Aug 2017. Added comment about Python 3. KS.
#     11th Oct 2017. Corrected initial comments - duplicate words removed. KS.

import sys
import string
import os

# ------------------------------------------------------------------------------

#                     W r i t e  S u b j e c t  I n d e x
#
#  This routine is passed a list containing a set of subject entries in the
#  "topic!sub-topic!sub-topic" form. They may contain duplicates, and need
#  not be sorted. It writes these out in alphabetical hierarchical format, eg
#  topic
#     sub-topic
#        sub-topic
#  to the specified file.
#
#  Note that what is passed should be an open file, not the name of the file.
#  This allows the caller to add additional material - comments, for example -
#  before or after the entries written by this routine.

def WriteSubjectIndex (IndexEntries,OutputFile) :

   #  Writing the output file is easy enough, once we have the entries
   #  in the list sorted - which sorted() does nicely. We can then spot
   #  duplicate entries - they'll the same as the previous entry - and
   #  we can see where the various levels change. We only support three
   #  levels of entry.

   LastEntry = ""
   LastTop = ""
   LastSecond = ""
   Count = 0
   for Entry in sorted(IndexEntries) :
      if (Entry != LastEntry) :
         LastEntry = Entry
         Count = Count + 1
         Levels = Entry.split("!")
         if (Levels[0] != LastTop) :
            OutputFile.write(Levels[0] + "\r\n")
            LastTop = Levels[0]
            LastSecond = ""
         if (len(Levels) > 1) :
            if (Levels[1] != LastSecond) :
               OutputFile.write("    " + Levels[1] + "\r\n")
               LastSecond = Levels[1]
         if (len(Levels) > 2) :
            OutputFile.write("        " + Levels[2] + "\r\n")

# ------------------------------------------------------------------------------

#                     R e a d  I n d e x  L i s t
#
#  This routine is passed the name of a file containing subject index entries
#  in alphabetical hierarchical format, eg
#  topic
#     sub-topic
#        sub-topic
#  It reads these and returns a list containing these subject entries in the
#  "topic!sub-topic!sub-topic" form. Blank lines and lines beginning with
#  '#' are ignored.
#
#  Note that if the file does not exist, this routine returns an empty list
#  but does not output any error messages. If you need to know if the file
#  exists, check this before calling this routine.

def ReadIndexList (FilePath) :
   IndexList = []
   if (os.path.exists(FilePath)) :
      IndexFile = open(FilePath,mode='r')
      TopLevel = ""
      SecondLevel = ""
      for Entry in IndexFile :
         if (not Entry.startswith('#')) :
            FullEntry = ""
            if (Entry.startswith("        ")) :
               FullEntry = TopLevel + '!' + SecondLevel + '!' + \
                                    Entry.rstrip(" \r\n").lstrip()
            elif (Entry.startswith("    ")) :
               SecondLevel = Entry.rstrip(" \r\n").lstrip()
               FullEntry = TopLevel + '!' + SecondLevel
            else :
               if (Entry.strip() != "") :
                  TopLevel = Entry.rstrip(" \r\n").lstrip()
                  FullEntry = TopLevel
            if (FullEntry != "") :
               IndexList.append(FullEntry)
      IndexFile.close()
   return IndexList