pseudocode.txt

###############################################################################
#             This document is NOT COMPLETE and NOT COMPREHENSIVE             #
###############################################################################

FUNCTION main(args)
	FOR file IN args.files
		occurrences = NEW list()
		languages = NEW set()
		word_dict = NEW dictionary() [FOOTNOTE 1]
		
		FOR div IN file WHERE div.type == edition AND div.subtype IN ["transcription", "translation"]
			new_occurrences = get_words_from_element(div)
			plaintext_file = open(args.flat + "/" + file.replace("xml", "txt"))
			IF div.subtype == translation
				plaintext_file = open(args.flat + "/" + file.replace("xml", "") + "_tranl.txt")
			FOR occurence IN new_occurences
				occurence.text >> plaintext_file
			occurence += new_occurrences
		
		FOR occurrence IN occurrences
			languages.add(occurrence.language)
			word = word_dict[occurrence.lemma][occurence.language]
			IF word.is_empty()
				word = NEW iip_word()
			word.occurences.append(occurrence)

		print(occurences)

		IF args.html_general
			word_list_to_html(word_dict, languages)

FUNCTION get_words_from_element(element)
	walker = NEW xml_walker(element)
	occurences = NEW list()
	new_occurence = NEW iip_occurence()	
	within = NEW list()
	FOR step IN walker:
		FOR tag IN step.opening
			within.append(tag)
		FOR tag IN step.closing
			within.remove(tag)
		IF is_word_terminating(step)
			occurences.append(new_occurence)
			new_occurence = NEW iip_occurence()
		ELSE
			new_occurence.text += step.occurence
	RETURN words

FUNCTION is_word_terminating(step, within)
	IF "lb" IN [x.tag_text FOR x IN step.self_closing IF x.break != "false"]
		return true
	IF step.character.is_whitespace()	
		IF step.character.is_indentation()
			return false
		IF step.character == "\n" AND step.previous_tag IN include_trailing_line_breaks:
			return false
		return true
	
FUNCTION word_list_to_html(word_dict, languages)
	FOR language IN languages
		mkdir "docs/" + language
	FOR word IN word_dict
		FOR language IN word
			write_word_info_page(word)
	FOR language IN languages
		FOR word IN word_dict
			IF exists(word[language])
				index_template.add(word)
		write(index_template, "docs/" + language + "/index.html")

FUNCTION write_word_info_page(word)


══════════════════════════════════════════════════════════════════════════════
[FOOTNOTE 1]
word_dict is a dictionary mapping lemmas to dictionaries mapping languages to 
iip_word objects in the following manner:


                      ┌──▶ English ──▶ iip_word | occurences = [a, b, c]
            ┌──▶ no  ─┤
word_dict ──┤         └──▶ Spanish ──▶ iip_word | occurences = [d]
            │
            └──▶ dog ────▶ English ──▶ iip_word | occurences = [e, f, g]


ex) word_dict[no][spanish].occurences == [d]

══════════════════════════════════════════════════════════════════════════════