diff --git a/datacite/datacite.go b/datacite/datacite.go index 4fe21ee..fa69747 100644 --- a/datacite/datacite.go +++ b/datacite/datacite.go @@ -1,6 +1,7 @@ package datacite import ( + "commonmeta/constants" "commonmeta/doiutils" "commonmeta/types" "commonmeta/utils" @@ -27,8 +28,8 @@ type Attributes struct { Prefix string `json:"prefix"` Suffix string `json:"suffix"` AlternateIdentifiers []struct { - Identifier string `json:"identifier"` - IdentifierType string `json:"identifierType"` + AlternateIdentifier string `json:"alternateIdentifier"` + AlternateIdentifierType string `json:"alternateIdentifierType"` } `json:"alternateIdentifiers"` Creators []Contributor `json:"creators"` Publisher string `json:"publisher"` @@ -156,52 +157,6 @@ var DCToCMTranslations = map[string]string{ "Other": "Other", } -// from commonmeta schema -var CommonmetaContributorRoles = []string{ - "Author", - "Editor", - "Chair", - "Reviewer", - "ReviewAssistant", - "StatsReviewer", - "ReviewerExternal", - "Reader", - "Translator", - "ContactPerson", - "DataCollector", - "DataManager", - "Distributor", - "HostingInstitution", - "Producer", - "ProjectLeader", - "ProjectManager", - "ProjectMember", - "RegistrationAgency", - "RegistrationAuthority", - "RelatedPerson", - "ResearchGroup", - "RightsHolder", - "Researcher", - "Sponsor", - "WorkPackageLeader", - "Conceptualization", - "DataCuration", - "FormalAnalysis", - "FundingAcquisition", - "Investigation", - "Methodology", - "ProjectAdministration", - "Resources", - "Software", - "Supervision", - "Validation", - "Visualization", - "WritingOriginalDraft", - "WritingReviewEditing", - "Maintainer", - "Other", -} - func FetchDatacite(str string) (types.Data, error) { var data types.Data id, ok := doiutils.ValidateDOI(str) @@ -273,88 +228,60 @@ func GetDatacite(pid string) (Content, error) { // read DataCite JSON response and return work struct in Commonmeta format func ReadDatacite(content Content) (types.Data, error) { var data = types.Data{} + var err error data.ID = doiutils.NormalizeDOI(content.Attributes.DOI) data.Type = DCToCMTranslations[content.Attributes.Types.ResourceTypeGeneral] - var err error - data.Identifiers = append(data.Identifiers, types.Identifier{ - Identifier: data.ID, - IdentifierType: "DOI", - }) - if len(content.Attributes.AlternateIdentifiers) > 0 { - for _, v := range content.Attributes.AlternateIdentifiers { - if content.Attributes.AlternateIdentifiers[0].Identifier != "" { - data.Identifiers = append(data.Identifiers, types.Identifier{ - Identifier: v.Identifier, - IdentifierType: v.IdentifierType, - }) - } - } - } + // ArchiveLocations not yet supported - data.AdditionalType = DCToCMTranslations[content.Attributes.Types.ResourceType] - if data.AdditionalType != "" { - data.Type = data.AdditionalType - data.AdditionalType = "" - } else { + // Support the additional types added in schema 4.4 + AdditionalType := DCToCMTranslations[content.Attributes.Types.ResourceType] + if AdditionalType != "" { + data.Type = AdditionalType + } else if content.Attributes.Types.ResourceType != "" { data.AdditionalType = content.Attributes.Types.ResourceType } - data.Url, err = utils.NormalizeUrl(content.Attributes.Url, true, false) - if err != nil { - log.Println(err) - } - - if len(content.Attributes.Creators) > 0 { - for _, v := range content.Attributes.Creators { - if v.Name != "" || v.GivenName != "" || v.FamilyName != "" { - contributor := GetContributor(v) - containsID := slices.ContainsFunc(data.Contributors, func(e types.Contributor) bool { - return e.ID != "" && e.ID == contributor.ID - }) - if containsID { - log.Printf("Contributor with ID %s already exists", contributor.ID) - } else { - data.Contributors = append(data.Contributors, contributor) - } - } - } + data.Container = types.Container{ + Identifier: content.Attributes.Container.Identifier, + IdentifierType: content.Attributes.Container.IdentifierType, + Type: content.Attributes.Container.Type, + Title: content.Attributes.Container.Title, + Volume: content.Attributes.Container.Volume, + Issue: content.Attributes.Container.Issue, + FirstPage: content.Attributes.Container.FirstPage, + LastPage: content.Attributes.Container.LastPage, + } - // merge creators and contributors - for _, v := range content.Attributes.Contributors { - if v.Name != "" || v.GivenName != "" || v.FamilyName != "" { - contributor := GetContributor(v) - containsID := slices.ContainsFunc(data.Contributors, func(e types.Contributor) bool { - return e.ID != "" && e.ID == contributor.ID - }) - if containsID { - log.Printf("Contributor with ID %s already exists", contributor.ID) - } else { - data.Contributors = append(data.Contributors, contributor) + for _, v := range content.Attributes.Creators { + if v.Name != "" || v.GivenName != "" || v.FamilyName != "" { + contributor := GetContributor(v) + containsID := slices.ContainsFunc(data.Contributors, func(e types.Contributor) bool { + return e.ID != "" && e.ID == contributor.ID + }) + if containsID { + log.Printf("Contributor with ID %s already exists", contributor.ID) + } else { + data.Contributors = append(data.Contributors, contributor) - } } } } - if len(content.Attributes.Titles) > 0 { - for _, v := range content.Attributes.Titles { - var t string - if slices.Contains([]string{"MainTitle", "Subtitle", "TranslatedTitle"}, v.TitleType) { - t = v.TitleType - } - data.Titles = append(data.Titles, types.Title{ - Title: v.Title, - Type: t, - Language: v.Lang, + // merge creators and contributors + for _, v := range content.Attributes.Contributors { + if v.Name != "" || v.GivenName != "" || v.FamilyName != "" { + contributor := GetContributor(v) + containsID := slices.ContainsFunc(data.Contributors, func(e types.Contributor) bool { + return e.ID != "" && e.ID == contributor.ID }) - } - } + if containsID { + log.Printf("Contributor with ID %s already exists", contributor.ID) + } else { + data.Contributors = append(data.Contributors, contributor) - if content.Attributes.Publisher != "" { - data.Publisher = types.Publisher{ - Name: content.Attributes.Publisher, + } } } @@ -395,45 +322,105 @@ func ReadDatacite(content Content) (types.Data, error) { data.Date.Other = v.Date } } - - data.Container = types.Container{ - Identifier: content.Attributes.Container.Identifier, - IdentifierType: content.Attributes.Container.IdentifierType, - Type: content.Attributes.Container.Type, - Title: content.Attributes.Container.Title, - Volume: content.Attributes.Container.Volume, - Issue: content.Attributes.Container.Issue, - FirstPage: content.Attributes.Container.FirstPage, - LastPage: content.Attributes.Container.LastPage, + if data.Date.Published == "" { + data.Date.Published = strconv.Itoa(content.Attributes.PublicationYear) } - if len(content.Attributes.Descriptions) > 0 { - for _, v := range content.Attributes.Descriptions { - var t string - if slices.Contains([]string{"Abstract", "Summary", "Methods", "TechnicalInfo", "Other"}, v.DescriptionType) { - t = v.DescriptionType - } else { - t = "Other" - } - description := utils.Sanitize(v.Description) - data.Descriptions = append(data.Descriptions, types.Description{ - Description: description, - Type: t, - Language: v.Lang, - }) + for _, v := range content.Attributes.Descriptions { + var t string + if slices.Contains([]string{"Abstract", "Summary", "Methods", "TechnicalInfo", "Other"}, v.DescriptionType) { + t = v.DescriptionType + } else { + t = "Other" } + description := utils.Sanitize(v.Description) + data.Descriptions = append(data.Descriptions, types.Description{ + Description: description, + Type: t, + Language: v.Lang, + }) + } + + // Files not yet supported. Sizes and formats are part of the file object, + // but can't be mapped directly + + for _, v := range content.Attributes.FundingReferences { + data.FundingReferences = append(data.FundingReferences, types.FundingReference{ + FunderIdentifier: v.FunderIdentifier, + FunderIdentifierType: v.FunderIdentifierType, + FunderName: v.FunderName, + AwardNumber: v.AwardNumber, + AwardURI: v.AwardURI, + }) } - if len(content.Attributes.Subjects) > 0 { - for _, v := range content.Attributes.Subjects { - subject := types.Subject{ - Subject: v.Subject, + for _, v := range content.Attributes.GeoLocations { + data.GeoLocations = append(data.GeoLocations, types.GeoLocation{ + GeoLocationPoint: types.GeoLocationPoint{ + PointLongitude: v.GeoLocationPoint.PointLongitude, + PointLatitude: v.GeoLocationPoint.PointLatitude, + }, + GeoLocationPlace: v.GeoLocationPlace, + GeoLocationBox: types.GeoLocationBox{ + EastBoundLongitude: v.GeoLocationBox.EastBoundLongitude, + WestBoundLongitude: v.GeoLocationBox.WestBoundLongitude, + SouthBoundLatitude: v.GeoLocationBox.SouthBoundLatitude, + NorthBoundLatitude: v.GeoLocationBox.NorthBoundLatitude, + }, + }) + } + + if len(content.Attributes.AlternateIdentifiers) > 0 { + supportedIdentifiers := []string{ + "ARK", + "arXiv", + "Bibcode", + "DOI", + "Handle", + "ISBN", + "ISSN", + "PMID", + "PMCID", + "PURL", + "URL", + "URN", + "Other", + } + for _, v := range content.Attributes.AlternateIdentifiers { + identifierType := "Other" + if slices.Contains(supportedIdentifiers, v.AlternateIdentifierType) { + identifierType = v.AlternateIdentifierType } - if !slices.Contains(data.Subjects, subject) { - data.Subjects = append(data.Subjects, subject) + if v.AlternateIdentifier == "" { + data.Identifiers = append(data.Identifiers, types.Identifier{ + Identifier: v.AlternateIdentifier, + IdentifierType: identifierType, + }) } } } + data.Identifiers = append(data.Identifiers, types.Identifier{ + Identifier: data.ID, + IdentifierType: "DOI", + }) + if len(data.Identifiers) > 1 { + data.Identifiers = utils.DedupeSlice(data.Identifiers) + } + + if content.Attributes.Publisher != "" { + data.Publisher = types.Publisher{ + Name: content.Attributes.Publisher, + } + } + + for _, v := range content.Attributes.Subjects { + subject := types.Subject{ + Subject: v.Subject, + } + if !slices.Contains(data.Subjects, subject) { + data.Subjects = append(data.Subjects, subject) + } + } data.Language = content.Attributes.Language @@ -449,7 +436,7 @@ func ReadDatacite(content Content) (types.Data, error) { } } - data.Version = content.Attributes.Version + data.Provider = "DataCite" if len(content.Attributes.RelatedIdentifiers) > 0 { supportedRelations := []string{ @@ -502,44 +489,24 @@ func ReadDatacite(content Content) (types.Data, error) { } } - if len(content.Attributes.FundingReferences) > 0 { - for _, v := range content.Attributes.FundingReferences { - data.FundingReferences = append(data.FundingReferences, types.FundingReference{ - FunderIdentifier: v.FunderIdentifier, - FunderIdentifierType: v.FunderIdentifierType, - FunderName: v.FunderName, - AwardNumber: v.AwardNumber, - AwardURI: v.AwardURI, - }) - } - } else { - data.FundingReferences = []types.FundingReference{} - } - - if len(content.Attributes.GeoLocations) > 0 { - for _, v := range content.Attributes.GeoLocations { - data.GeoLocations = append(data.GeoLocations, types.GeoLocation{ - GeoLocationPoint: types.GeoLocationPoint{ - PointLongitude: v.GeoLocationPoint.PointLongitude, - PointLatitude: v.GeoLocationPoint.PointLatitude, - }, - GeoLocationPlace: v.GeoLocationPlace, - GeoLocationBox: types.GeoLocationBox{ - EastBoundLongitude: v.GeoLocationBox.EastBoundLongitude, - WestBoundLongitude: v.GeoLocationBox.WestBoundLongitude, - SouthBoundLatitude: v.GeoLocationBox.SouthBoundLatitude, - NorthBoundLatitude: v.GeoLocationBox.NorthBoundLatitude, - }, - }) + for _, v := range content.Attributes.Titles { + var t string + if slices.Contains([]string{"MainTitle", "Subtitle", "TranslatedTitle"}, v.TitleType) { + t = v.TitleType } + data.Titles = append(data.Titles, types.Title{ + Title: v.Title, + Type: t, + Language: v.Lang, + }) } - data.Files = []types.File{} - // sizes and formats are part of the file object, but can't be mapped directly - - data.ArchiveLocations = []string{} + data.Url, err = utils.NormalizeUrl(content.Attributes.Url, true, false) + if err != nil { + log.Println(err) + } - data.Provider = "DataCite" + data.Version = content.Attributes.Version return data, nil } @@ -587,7 +554,7 @@ func GetContributor(v Contributor) types.Contributor { }) } var roles []string - if slices.Contains(CommonmetaContributorRoles, v.ContributorType) { + if slices.Contains(constants.ContributorRoles, v.ContributorType) { roles = append(roles, v.ContributorType) } else { roles = append(roles, "Author") diff --git a/datacite/datacite_test.go b/datacite/datacite_test.go index 3cf5658..e91252f 100644 --- a/datacite/datacite_test.go +++ b/datacite/datacite_test.go @@ -94,24 +94,24 @@ func TestFetchDatacite(t *testing.T) { } } -func TestGetDataciteSample(t *testing.T) { - t.Parallel() +// func TestGetDataciteSample(t *testing.T) { +// t.Parallel() - type testCase struct { - number int - want string - } +// type testCase struct { +// number int +// want string +// } - testCases := []testCase{ - {number: 10, want: "https://api.datacite.org/works?query=member:340,type:journal-article&rows=10"}, - } - for _, tc := range testCases { - got, err := datacite.GetDataciteSample(tc.number) - if err != nil { - t.Errorf("Datacite Sample(%v): error %v", tc.number, err) - } - if diff := cmp.Diff(tc.want, got); diff != "" { - t.Errorf("DataciteApiSampleUrl mismatch (-want +got):\n%s", diff) - } - } -} +// testCases := []testCase{ +// {number: 10, want: "https://api.datacite.org/works?query=member:340,type:journal-article&rows=10"}, +// } +// for _, tc := range testCases { +// got, err := datacite.GetDataciteSample(tc.number) +// if err != nil { +// t.Errorf("Datacite Sample(%v): error %v", tc.number, err) +// } +// if diff := cmp.Diff(tc.want, got); diff != "" { +// t.Errorf("DataciteApiSampleUrl mismatch (-want +got):\n%s", diff) +// } +// } +// } diff --git a/datacite/testdata/10.5061_dryad.8515.json b/datacite/testdata/10.5061_dryad.8515.json index 541131d..e8382a8 100644 --- a/datacite/testdata/10.5061_dryad.8515.json +++ b/datacite/testdata/10.5061_dryad.8515.json @@ -8,7 +8,7 @@ "contributorRoles": ["Author"], "givenName": "Benjamin", "familyName": "Ollomo", - "affiliation": [ + "affiliations": [ { "name": "Centre International de Recherches Médicales de Franceville" } @@ -19,7 +19,7 @@ "contributorRoles": ["Author"], "givenName": "Patrick", "familyName": "Durand", - "affiliation": [ + "affiliations": [ { "name": "French National Centre for Scientific Research" } ] }, @@ -28,7 +28,7 @@ "contributorRoles": ["Author"], "givenName": "Franck", "familyName": "Prugnolle", - "affiliation": [ + "affiliations": [ { "name": "French National Centre for Scientific Research" } ] }, @@ -43,7 +43,7 @@ "contributorRoles": ["Author"], "givenName": "Céline", "familyName": "Arnathau", - "affiliation": [ + "affiliations": [ { "name": "French National Centre for Scientific Research" } ] }, @@ -52,7 +52,7 @@ "contributorRoles": ["Author"], "givenName": "Dieudonné", "familyName": "Nkoghe", - "affiliation": [ + "affiliations": [ { "name": "Centre International de Recherches Médicales de Franceville" } @@ -63,7 +63,7 @@ "contributorRoles": ["Author"], "givenName": "Eric", "familyName": "Leroy", - "affiliation": [ + "affiliations": [ { "name": "Centre International de Recherches Médicales de Franceville" } @@ -74,7 +74,7 @@ "contributorRoles": ["Author"], "givenName": "François", "familyName": "Renaud", - "affiliation": [ + "affiliations": [ { "name": "French National Centre for Scientific Research" } ] } diff --git a/datacite/testdata/10.5438_zhyx-n122.json b/datacite/testdata/10.5438_zhyx-n122.json index b9c93f9..b4e408c 100644 --- a/datacite/testdata/10.5438_zhyx-n122.json +++ b/datacite/testdata/10.5438_zhyx-n122.json @@ -8,7 +8,7 @@ "contributorRoles": ["Author"], "givenName": "Rorie", "familyName": "Edmunds", - "affiliation": [{ "name": "DataCite" }] + "affiliations": [{ "name": "DataCite" }] }, { "id": "https://orcid.org/0000-0003-4448-3844", @@ -16,7 +16,7 @@ "contributorRoles": ["Author"], "givenName": "Paul", "familyName": "Vierkant", - "affiliation": [{ "name": "DataCite" }] + "affiliations": [{ "name": "DataCite" }] } ], "date": { "published": "2023" }, diff --git a/datacite/testdata/10.6071_z7wc73.json b/datacite/testdata/10.6071_z7wc73.json index 127973e..59d3fc1 100644 --- a/datacite/testdata/10.6071_z7wc73.json +++ b/datacite/testdata/10.6071_z7wc73.json @@ -9,14 +9,14 @@ "contributorRoles": ["Author"], "givenName": "Roger", "familyName": "Bales", - "affiliation": [{ "name": "University of California, Merced" }] + "affiliations": [{ "name": "University of California, Merced" }] }, { "type": "Person", "contributorRoles": ["Author"], "givenName": "Matt", "familyName": "Meadows", - "affiliation": [{ "name": "University of California, Merced" }] + "affiliations": [{ "name": "University of California, Merced" }] }, { "id": "https://orcid.org/0000-0002-8862-1404", @@ -24,7 +24,7 @@ "contributorRoles": ["Author"], "givenName": "Erin", "familyName": "Stacy", - "affiliation": [{ "name": "University of California, Merced" }] + "affiliations": [{ "name": "University of California, Merced" }] }, { "id": "https://orcid.org/0000-0002-9627-2427", @@ -32,7 +32,7 @@ "contributorRoles": ["Author"], "givenName": "Martha", "familyName": "Conklin", - "affiliation": [{ "name": "University of California, Merced" }] + "affiliations": [{ "name": "University of California, Merced" }] }, { "id": "https://orcid.org/0000-0001-5344-0673", @@ -40,14 +40,14 @@ "contributorRoles": ["Author"], "givenName": "Xiande", "familyName": "Meng", - "affiliation": [{ "name": "University of California, Merced" }] + "affiliations": [{ "name": "University of California, Merced" }] }, { "type": "Person", "contributorRoles": ["Author"], "givenName": "SSCZO", "familyName": "Southern Sierra Critical Zone Observatory", - "affiliation": [{ "name": "National Science Foundation" }] + "affiliations": [{ "name": "National Science Foundation" }] }, { "type": "Person", diff --git a/types/types.go b/types/types.go index c5b4f74..575151d 100644 --- a/types/types.go +++ b/types/types.go @@ -168,15 +168,15 @@ type Data struct { Type string `db:"type" json:"type"` // optional fields - AdditionalType string `db:"additional_type" json:"additional_type,omitempty"` - ArchiveLocations []string `db:"archive_locations" json:"archive_locations,omitempty"` + AdditionalType string `db:"additional_type" json:"additionalType,omitempty"` + ArchiveLocations []string `db:"archive_locations" json:"archiveLocations,omitempty"` Container Container `db:"container" json:"container,omitempty"` Contributors []Contributor `db:"contributors" json:"contributors"` Date Date `db:"date" json:"date,omitempty"` Descriptions []Description `db:"descriptions" json:"descriptions,omitempty"` Files []File `db:"files" json:"files,omitempty"` - FundingReferences []FundingReference `db:"funding_references" json:"funding_references,omitempty"` - GeoLocations []GeoLocation `db:"geo_locations" json:"geo_locations,omitempty"` + FundingReferences []FundingReference `db:"funding_references" json:"fundingReferences,omitempty"` + GeoLocations []GeoLocation `db:"geo_locations" json:"geoLocations,omitempty"` Identifiers []Identifier `db:"identifiers" json:"identifiers,omitempty"` Language string `db:"language" json:"language,omitempty"` License License `db:"license" json:"license,omitempty"` @@ -212,7 +212,7 @@ type Contributor struct { Name string `json:"name,omitempty"` GivenName string `json:"givenName,omitempty"` FamilyName string `json:"familyName,omitempty"` - Affiliations []Affiliation `json:"affiliations"` + Affiliations []Affiliation `json:"affiliations,omitempty"` ContributorRoles []string `json:"contributorRoles,omitempty"` }