Skip to content

Commit

Permalink
Merge pull request #3547 from gregchapman-dev/gregc/fullHumdrumMetada…
Browse files Browse the repository at this point in the history
…taImport

Import a lot more Humdrum metadata
  • Loading branch information
craigsapp authored Nov 21, 2023
2 parents bec87eb + 0c02b66 commit 58675e7
Show file tree
Hide file tree
Showing 2 changed files with 2,237 additions and 460 deletions.
178 changes: 170 additions & 8 deletions include/vrv/iohumdrum.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,42 @@ class HumdrumSignifiers {

#endif /* NO_HUMDRUM_SUPPORT */

struct HumdrumReferenceItem {
std::string lineText; // the full text of the HumdrumLine containing this item,
// e.g. "!!!OTL2@FR:Le deuxième titre Français"
std::string key; // the interpreted key, with key, index, isTranslated, language stripped out
// e.g. "OTL" (if not parseable, we get everything between "!!!" and ":")
std::string value; // the value (everything after the ':')
// e.g. "Le deuxième titre Français"
bool isParseable = false; // true if we could parse out key, index, isTranslated, language
// e.g. true
bool isHumdrumKey = false; // true if isParseable and key is in the known list of Humdrum keys
// e.g. true
bool isTranslated = false; // true if single '@' (not '@@') is present
// e.g. true
std::string language; // the language, if present, lowercased
// e.g. "fr"
int index = -1; // the index (0 if not present)
// e.g. 2
};

struct DateWithErrors {
bool valid = false; // if false, ignore everything here, the date was not parseable.
std::string dateError; // error of the entire date ("", "approximate", "uncertain")
int year;
std::string yearError; // error of the year ("", "approximate", "uncertain")
int month;
std::string monthError; // error of the month ("", "approximate", "uncertain")
int day;
std::string dayError; // error of the day ("", "approximate", "uncertain")
int hour;
std::string hourError; // error of the hour ("", "approximate", "uncertain")
int minute;
std::string minuteError; // error of the minute ("", "approximate", "uncertain")
int second;
std::string secondError; // error of the second ("", "approximate", "uncertain")
};

//----------------------------------------------------------------------------
// HumdrumInput
//----------------------------------------------------------------------------
Expand Down Expand Up @@ -849,13 +885,32 @@ class HumdrumInput : public vrv::Input {

// header related functions: ///////////////////////////////////////////
void createHeader();
void insertTitle(pugi::xml_node &titleStmt, const std::vector<hum::HumdrumLine *> &references);
void insertExtMeta(std::vector<hum::HumdrumLine *> &references);
void addPerson(std::vector<std::vector<std::string>> &respPeople, std::vector<hum::HumdrumLine *> &references,
const std::string &key, const std::string &role);
void getRespPeople(std::vector<std::vector<std::string>> &respPeople, std::vector<hum::HumdrumLine *> &references);
void insertRespStmt(pugi::xml_node &titleStmt, std::vector<std::vector<std::string>> &respPeople);
void insertPeople(pugi::xml_node &work, std::vector<std::vector<std::string>> &respPeople);
void createFileDesc(pugi::xml_node meiHead);
void createDigitalSource(pugi::xml_node sourceDesc);
void createPrintedSource(pugi::xml_node sourceDesc);
void createRecordedSource(pugi::xml_node sourceDesc);
void createUnpublishedSource(pugi::xml_node sourceDesc);
void createEncodingDesc(pugi::xml_node meiHead);
void createWorkList(pugi::xml_node meiHead);
void createHumdrumVerbatimExtMeta(pugi::xml_node meiHead);
void createSimpleTitleElement();
void createSimpleComposerElements();
void createTitleElements(pugi::xml_node element);
void createComposerElements(pugi::xml_node element);
void fillInIsoDate(pugi::xml_node element, string dateString);
std::map<std::string, std::string> isoDateAttributesFromHumdrumDate(string inHumdrumDate, bool edtf=false);
DateWithErrors dateWithErrorsFromHumdrumDate(string dateString);
std::string isoDateFromDateWithErrors(DateWithErrors date, bool edtf);
bool sanityCheckDate(
int year, int month, int day,
int hour, int minute, int second);
std::string stripDateError(string &value);
std::string getTextListLanguage(std::vector<HumdrumReferenceItem> textItems);
std::map<std::string, std::vector<HumdrumReferenceItem>> getAllReferenceItems(hum::HumdrumFile infile);
std::vector<HumdrumReferenceItem> getReferenceItems(const std::string &key);
bool anyReferenceItemsExist(std::vector<string> keys);
int getBestItem(std::vector<HumdrumReferenceItem> items, string requiredLanguage);
bool isStandardHumdrumKey(string key);

/// Templates ///////////////////////////////////////////////////////////
template <class ELEMENT> void verticalRest(ELEMENT rest, const std::string &token);
Expand Down Expand Up @@ -929,7 +984,6 @@ class HumdrumInput : public vrv::Input {
static int nextLowerPowerOfTwo(int x);
static hum::HumNum nextHigherPowerOfTwo(hum::HumNum x);
static std::string getDateString();
static std::string getReferenceValue(const std::string &key, std::vector<hum::HumdrumLine *> &references);
static bool replace(std::string &str, const std::string &oldStr, const std::string &newStr);
static bool replace(std::u32string &str, const std::u32string &oldStr, const std::u32string &newStr);

Expand Down Expand Up @@ -1212,6 +1266,114 @@ class HumdrumInput : public vrv::Input {
// (excluding augmentation dots).
std::string m_textSmuflSpacer = "\xc2\xa0";

// Some metadata elements that are computed once and used multiple times
std::vector<hum::HumdrumLine *> m_humdrumLineReferences;
std::map<std::string, std::vector<HumdrumReferenceItem>> m_references;
pugi::xml_document m_simpleTitleDoc;
pugi::xml_node m_simpleTitle;
pugi::xml_document m_simpleComposersDoc;
pugi::xml_document m_madsDoc;
pugi::xml_node m_madsCollection;

vector<string> m_standardHumdrumKeys = {
"COM", // composer's name
"COA", // attributed composer
"COS", // suspected composer
"COL", // composer's abbreviated, alias, or stage name
"COC", // composer's corporate name
"CDT", // composer's birth and death dates (**zeit format)
"CBL", // composer's birth location
"CDL", // composer's death location
"CNT", // composer's nationality
"LYR", // lyricist's name
"LIB", // librettist's name
"LAR", // music arranger's name
"LOR", // orchestrator's name
"TXO", // original language of vocal/choral text
"TXL", // language of the encoded vocal/choral text
"TRN", // translator of the text
"RTL", // album title
"RMM", // manufacturer or sponsoring company
"RC#", // recording company's catalog number of album
"RRD", // release date (**date format)
"RLC", // place of recording
"RNP", // producer's name
"RDT", // date of recording (**date format)
"RT#", // track number
"MGN", // Humdrum encodes, say, a MIDI performance)
"MPN", // ensemble's name
"MPS", // performer's name
"MRD", // suspected performer
"MLC", // date of performance (**date format)
"MCN", // place of performance
"MPD", // conductor's name
"MDT", // date of first performance (**date format)
"OTL", // I've seen 'em (another way to say date of performance?)
"OTP", // title
"OTA", // popular title
"OPR", // alternative title
"OAC", // title of parent work
"OSC", // act number (e.g. '2' or 'Act 2')
"OMV", // scene number (e.g. '3' or 'Scene 3')
"OMD", // movement number (e.g. '4', or 'mov. 4', or...)
"OPS", // movement name
"ONM", // opus number (e.g. '23', or 'Opus 23')
"OVM", // number (e.g. number of song within ABC multi-song file)
"ODE", // volume number (e.g. '6' or 'Vol. 6')
"OCO", // dedicated to
"OCL", // commissioned by
"ONB", // collected/transcribed by
"ODT", // free form note related to title or identity of work
"OCY", // date or period of composition (**date or **zeit format)
"OPC", // country of composition
"GTL", // city, town, or village of composition
"GAW", // group title (e.g. 'The Seasons')
"GCO", // associated work, such as a play or film
"PUB", // publication status 'published'/'unpublished'
"PED", // publication editor
"PPR", // first publisher
"PDT", // date first published (**date format)
"PTL", // publication (volume) title
"PPP", // place first published
"PC#", // publisher's catalog number (NOT scholarly catalog)
"SCT", // scholarly catalog abbrev/number (e.g. 'BWV 551')
"SCA", // scholarly catalog (unabbreviated) (e.g. 'Koechel 117')
"SMS", // unpublished manuscript source name
"SML", // unpublished manuscript location
"SMA", // acknowledgment of manuscript access
"YEP", // publisher of electronic edition
"YEC", // date and owner of electronic copyright
"YER", // date electronic edition released
"YEM", // copyright message (e.g. 'All rights reserved')
"YEN", // country of copyright
"YOR", // original document from which encoded doc was prepared
"YOO", // original document owner
"YOY", // original copyright year
"YOE", // original editor
"EED", // electronic editor
"ENC", // electronic encoder (person)
"END", // encoding date
"EMD", // electronic document modification description (one/mod)
"EEV", // electronic edition version
"EFL", // file number e.g. '1/4' for one of four
"EST", // encoding status (usually deleted before distribution)
"VTS", // checksum (excluding the VTS line itself)
"ACO", // collection designation
"AFR", // form designation
"AGN", // genre designation
"AST", // style, period, or type of work designation
"AMD", // mode classification e.g. '5; Lydian'
"AMT", // metric classification, must be one of eight names, e.g. 'simple quadruple'
"AIN", // instrumentation, must be alphabetical list of *I abbrevs, space-delimited
"ARE", // geographical region of origin (list of 'narrowing down' names of regions)
"ARL", // geographical location of origin (lat/long)
"HAO", // aural history (lots of text, stories about the work)
"HTX", // freeform translation of vocal text
"RLN", // Extended ASCII language code
"RNB", // a note about the representation
"RWB" // a warning about the representation
};

#endif /* NO_HUMDRUM_SUPPORT */
};

Expand Down
Loading

0 comments on commit 58675e7

Please sign in to comment.