pselkirk · chhotii-alex · Jan 9, 2020 · Feb 6, 2020 · Feb 8, 2020 · Feb 10, 2020
diff --git a/conguide/arisia-csv.py b/conguide/arisia-csv.py
@@ -142,6 +142,8 @@ def read_bios(fn, participants):
             pubsname = participant.Participant.chname[pubsname]
         except (AttributeError, KeyError):
             pass
+        if ',' in pubsname:
+            print('Warning: pubsname with comma: %s' % pubsname)
         try:
             p = participants[pubsname]
         except KeyError:

diff --git a/conguide/bios.py b/conguide/bios.py
@@ -247,7 +247,8 @@ def markupBioEntry(self, text):
 
 def write(output, participants):
     for p in sorted(participants.values()):
-        output.f.write(output.strBioEntry(p))
+        if p.sessions: # Don't list participants who don't have any sessions.
+            output.f.write(output.strBioEntry(p))
 
 def add_args(subparsers):
     parser = subparsers.add_parser('bios', add_help=False,

diff --git a/conguide/grid.py b/conguide/grid.py
@@ -200,6 +200,14 @@ def strTextCell(self, nrow, ncol, text, room=None):
     def strGrayCell(self, ncol):
         return '<td colspan="%d" class="gray">&nbsp;</td>\n' % ncol
 
+def add_char_range(the_set, startChar, endChar):
+    the_set |= set([chr(x) for x in range(ord(startChar), ord(endChar)+1)])
+
+BoringOldEnglishAlphabet = set(' ,.?:;!@<>_/()[]#%&*')
+add_char_range(BoringOldEnglishAlphabet, 'a', 'z')
+add_char_range(BoringOldEnglishAlphabet, 'A', 'Z')
+add_char_range(BoringOldEnglishAlphabet, '0', '9')
+
 class IndesignOutput(Output):
 
     name = 'indesign'
@@ -278,8 +286,27 @@ def _readconfig(self):
             pass
         self.configSlice()
 
+    def unichar_tag(self, ch):
+        return '<0x%04x>' % (ord(ch))
+
+    def char_or_tag(self, ch):
+        if ch in BoringOldEnglishAlphabet:
+            return ch
+        else:
+            return self.unichar_tag(ch)
+
     def cleanup(self, text):
         text = Output.cleanup(self, text)
+        # According to a document downloaded from                                                               
+        # https://acdowd-designs.com/sfsu_access/indesign_cs4_taggedtext.pdf                                    
+        # the the correct way to add "special" characters to InDesign tagged text
+	# is to replace, for example, Unicode character 2013 with the string "<0x2013>".
+        # This avoids some problems with InDesign interpreting ASCII-WIN differently on                         
+	# Macintosh.                                                       
+	# The document lists a small selection of special characters to be tagged
+	# this way; but some experimenting with InDesign suggests that this will
+	# work at least for various accented letters and possibly generally.
+        text = ''.join([self.char_or_tag(ch) for ch in text])
         # convert italics
         text = text.replace('<i>', '<CharStyle:Body italic>')
         text = text.replace('</i>', '<CharStyle:>')

diff --git a/conguide/schedule.py b/conguide/schedule.py
@@ -189,6 +189,24 @@ def cleanup(self, text):
         text = text.replace('\n', '<br/>\n')
         return text
 
+    '''
+    TODO: sanitize HTML 
+    We shall allow <i>tags.
+    We MIGHT allow <a> tags. If so, they will have target="_blank" added.
+    All other tags should be removed.
+    Other stray < and > characters should be replaced with the corresponding entity,
+    i.e. &lt; and &gt;. 
+    Existing entities in the text should not be clobbered. I.e., currently if 
+    the text includes &mdash; then the resulting text will include the nonsense &amp;mdash;.
+    Fix this!
+    For now, I have copied the inherited implementation to this class. Will replace with a
+    method that overrides with the correct implmentation. (Will have to fix cleanup() method
+    above as well. Also, work on strTitle(), almost the same (but excluding <a>).)
+    '''
+    def strDescription(self, session, upper=False):
+        text = self.cleanup(session.description)
+        return text.upper() if upper else text
+
     def markupSession(self, session, text):
         return '<p><a name="%s"></a>\n%s</p>' % (session.sessionid, text)
 
@@ -202,6 +220,9 @@ def markupParticipant(self, participant, name):
                 href = None
         return '<a href="%s#%s">%s</a>' % (href, re.sub(r'\W', '', name), name) if href else name
 
+def charToEntity(ch):
+    return '&#%d;' % (ord(ch))
+
 class XmlOutput(Output):
 
     name = 'xml'
@@ -283,6 +304,19 @@ def strIcon(self, session):
     def markupIcon(self, session, text):
         return '<ss-icon>%s</ss-icon>' % text if text else ''
 
+    '''
+    TODO:
+    We will allow <i> tags but all other HTML tags must be removed. 
+    Do we want to remove the text inside <a>...</a>?
+    Do stray instances of < or > mess up InDesign's XML parsing? We can
+    replace with &lt; and &gt; respectively. (Tested with InDesign version 14 that
+    these entities are handled correctly.) 
+    If any other characters might cause trouble, replace them with entity numbers 
+    generated using the charToEntity utility function. 
+    What's the policy on entity names in Zambia descriptions (for example &mdash;?)
+    InDesign does not recognize these. So, ideally we would replace entity names
+    other than &lt; and &gt; with entity numbers.
+    '''
     def strDescription(self, session):
         if dedup:
             try: