From 2a28c90cb3456ad0a7c0c93e554b80182fa06383 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Thu, 4 Jan 2024 16:31:58 -0600 Subject: [PATCH 01/26] feat: add generalized entity model for v2 search Issue: https://github.com/moov-io/watchman/issues/527 --- pkg/search/models.go | 166 ++++++++++++++++++++++++++++++++++++++ pkg/search/models_test.go | 38 +++++++++ 2 files changed, 204 insertions(+) create mode 100644 pkg/search/models.go create mode 100644 pkg/search/models_test.go diff --git a/pkg/search/models.go b/pkg/search/models.go new file mode 100644 index 00000000..3e3171d2 --- /dev/null +++ b/pkg/search/models.go @@ -0,0 +1,166 @@ +package search + +import "time" + +type Entity[T any] struct { + Name string `json:"name"` + Type EntityType `json:"entityType"` + Source SourceList `json:"sourceList"` + + // TODO(adam): What has opensanctions done to normalize and join this data + // Review https://www.opensanctions.org/reference/ + + Person *Person `json:"person"` + Business *Business `json:"business"` + Organization *Organization `json:"organization"` + Aircraft *Aircraft `json:"aircraft"` + Vessel *Vessel `json:"vessel"` + + CryptoAddresses []CryptoAddress `json:"cryptoAddresses"` + + Addresses []Address `json:"addresses"` + + SourceData T `json:"sourceData"` // Contains all original list data with source list naming +} + +type EntityType string + +var ( + EntityPerson EntityType = "person" + EntityBusiness EntityType = "business" + EntityAircraft EntityType = "aircraft" + EntityVessel EntityType = "vessel" + EntityCryptoAddress EntityType = "crypto-address" +) + +type SourceList string + +var ( + SourceEUCSL SourceList = "eu_csl" + SourceUKCSL SourceList = "uk_csl" + SourceUSCSL SourceList = "us_csl" + SourceUSOFAC SourceList = "us_ofac" +) + +type Person struct { + Name string `json:"name"` + Gender Gender `json:"gender"` + BirthDate *time.Time `json:"birthDate"` + DeathDate *time.Time `json:"deathDate"` + + GovernmentIDs []GovernmentID `json:"governmentIDs"` +} + +type Gender string + +var ( + GenderUnknown Gender = "unknown" + GenderMale Gender = "male" + GenderFemale Gender = "female" +) + +type GovernmentID struct { + Type GovernmentIDType `json:"type"` + Identifier string `json:"identifier"` +} + +type GovernmentIDType string + +var ( + GovernmentIDPassport GovernmentIDType = "passport" +) + +type Business struct { + Name string `json:"name"` + Created *time.Time `json:"created"` + Dissolved *time.Time `json:"dissolved"` + Identifier []Identifier `json:"identifier"` +} + +// Identifier +// +// TODO(adam): Look at OpenSanctions for tax ID codes +// https://www.opensanctions.org/reference/#schema.Company +type Identifier struct { + Type IdentifierType `json:"type"` + Identifier string `json:"value"` +} + +type IdentifierType string + +var ( + Identifier_US_EIN IdentifierType = "us_ein" + Identifier_US_SSN IdentifierType = "us_ssn" +) + +// Organization +// +// TODO(adam): https://www.opensanctions.org/reference/#schema.Organization +type Organization struct { + Name string `json:"name"` + Created *time.Time `json:"created"` + Dissolved *time.Time `json:"dissolved"` + Identifier []Identifier `json:"identifier"` +} + +type Aircraft struct { + Name string `json:"name"` + Type AircraftType `json:"type"` + Flag string `json:"flag"` // ISO-3166 + Built *time.Time `json:"built"` + ICAOCode string `json:"icaoCode"` // ICAO aircraft type designator + Model string `json:"model"` + SerialNumber string `json:"serialNumber"` +} + +type AircraftType string + +var ( + AircraftTypeUnknown AircraftType = "unknown" + AircraftCargo AircraftType = "cargo" +) + +// Vessel +// +// TODO(adam): https://www.opensanctions.org/reference/#schema.Vessel +type Vessel struct { + Name string `json:"name"` + IMONumber string `json:"imoNumber"` + Type VesselType `json:"type"` + Flag string `json:"flag"` // ISO-3166 + Built *time.Time `json:"built"` + Model string `json:"model"` + Tonnage int `json:"tonnage"` + MMSI string `json:"mmsi"` // Maritime Mobile Service Identity +} + +type VesselType string + +var ( + VesselTypeUnknown VesselType = "unknown" + VesselTypeCargo VesselType = "cargo" +) + +type CryptoAddress struct { + Currency string `json:"currency"` + Address string `json:"address"` +} + +// Address is a struct which represents any physical location +// +// TODO(adam): Should probably adopt something like libpostal's naming +// https://github.com/openvenues/libpostal?tab=readme-ov-file#parser-labels +// +// Or OpenSanctions +// https://www.opensanctions.org/reference/#schema.Address +type Address struct { + Line1 string `json:"line1"` + Line2 string `json:"line2"` + City string `json:"city"` + PostalCode string `json:"postalCode"` + State string `json:"state"` + Country string `json:"country"` // ISO-3166 code + + Latitude float64 `json:"latitude"` + Longitude float64 `json:"longitude"` +} diff --git a/pkg/search/models_test.go b/pkg/search/models_test.go new file mode 100644 index 00000000..a5e6a9df --- /dev/null +++ b/pkg/search/models_test.go @@ -0,0 +1,38 @@ +package search + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestEntityJSON(t *testing.T) { + type SDN struct { + EntityID string `json:"entityID"` + } + bs, err := json.MarshalIndent(Entity[SDN]{ + SourceData: SDN{ + EntityID: "12345", + }, + }, "", " ") + require.NoError(t, err) + + expected := strings.TrimSpace(`{ + "name": "", + "entityType": "", + "sourceList": "", + "person": null, + "business": null, + "organization": null, + "aircraft": null, + "vessel": null, + "cryptoAddresses": null, + "addresses": null, + "sourceData": { + "entityID": "12345" + } +}`) + require.Equal(t, expected, string(bs)) +} From 9582e1005c2a462f6018e25a3a74e18678ad9e94 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Thu, 4 Jan 2024 17:57:48 -0600 Subject: [PATCH 02/26] search/ofac: start parsing out remarks field, map to v2 generalized models --- pkg/ofac/mapper.go | 84 +++++++++++++++++++++++++++++++++++++++++ pkg/ofac/reader.go | 26 ++++++++++++- pkg/ofac/reader_test.go | 45 ++++++++++++++++++++++ pkg/search/models.go | 18 ++++++++- 4 files changed, 171 insertions(+), 2 deletions(-) create mode 100644 pkg/ofac/mapper.go diff --git a/pkg/ofac/mapper.go b/pkg/ofac/mapper.go new file mode 100644 index 00000000..7c084c9b --- /dev/null +++ b/pkg/ofac/mapper.go @@ -0,0 +1,84 @@ +package ofac + +import ( + "strings" + + "github.com/moov-io/watchman/pkg/search" +) + +func PtrToEntity(sdn *SDN) search.Entity[SDN] { + if sdn != nil { + return ToEntity(*sdn) + } + return search.Entity[SDN]{} +} + +// TODO(adam): Accept Addresses, Alts, Comments + +func ToEntity(sdn SDN) search.Entity[SDN] { + out := search.Entity[SDN]{ + Name: sdn.SDNName, + Source: search.SourceUSOFAC, + SourceData: sdn, + } + + switch strings.ToLower(strings.TrimSpace(sdn.SDNType)) { + case "-0-", "": + out.Type = search.EntityBusiness // TODO(adam): or EntityOrganization + // TODO(adam): How to tell Business vs Organization ? + + case "individual": + out.Type = search.EntityPerson + out.Person = &search.Person{ + Name: sdn.SDNName, + } + + // TODO(adam): + // DOB 02 Aug 1991; + // nationality Russia; + // Gender Male; + // Passport 0291622 (Belize); + + case "vessel": + out.Type = search.EntityVessel + out.Vessel = &search.Vessel{ + Name: sdn.SDNName, + + // IMONumber string `json:"imoNumber"` + // Type VesselType `json:"type"` + // Flag string `json:"flag"` // ISO-3166 + // Built *time.Time `json:"built"` + // Model string `json:"model"` + // Tonnage int `json:"tonnage"` // TODO(adam): remove , and ParseInt + // MMSI string `json:"mmsi"` // Maritime Mobile Service Identity + } + + // TODO(adam): + // Vessel Registration Identification IMO 9569712; + // MMSI 572469210; + // + // Former Vessel Flag None Identified; alt. Former Vessel Flag Tanzania; + + case "aircraft": + out.Type = search.EntityAircraft + out.Aircraft = &search.Aircraft{ + Name: sdn.SDNName, + + // Type AircraftType `json:"type"` + // Flag string `json:"flag"` // ISO-3166 + // Built *time.Time `json:"built"` + // ICAOCode string `json:"icaoCode"` // ICAO aircraft type designator + // Model string `json:"model"` + // SerialNumber string `json:"serialNumber"` + } + + // TODO(adam): + // Aircraft Construction Number (also called L/N or S/N or F/N) 8401; + // Aircraft Manufacture Date 1992; + // Aircraft Model IL76-TD; + // Aircraft Operator YAS AIR; + // Aircraft Manufacturer's Serial Number (MSN) 1023409321; + } + + return out +} diff --git a/pkg/ofac/reader.go b/pkg/ofac/reader.go index 1c9fc3e1..e6192412 100644 --- a/pkg/ofac/reader.go +++ b/pkg/ofac/reader.go @@ -252,6 +252,30 @@ func splitPrograms(in string) []string { return strings.Split(norm, "; ") } +func splitRemarks(input string) []string { + return strings.Split(input, ";") +} + +func findRemarkValues(remarks []string, suffix string) []string { + var out []string + if suffix == "" { + return out + } + for i := range remarks { + idx := strings.Index(remarks[i], suffix) + if idx == -1 { + continue // not found + } + + value := remarks[i][idx+len(suffix):] + value = strings.TrimPrefix(value, ":") // identifiers can end with a colon + value = strings.TrimSuffix(value, ";") + value = strings.TrimSuffix(value, ".") + out = append(out, strings.TrimSpace(value)) + } + return out +} + var ( digitalCurrencies = []string{ "XBT", // Bitcoin @@ -281,7 +305,7 @@ func readDigitalCurrencyAddresses(remarks string) []DigitalCurrencyAddress { // // alt. Digital Currency Address - XBT 12jVCWW1ZhTLA5yVnroEJswqKwsfiZKsax; // - parts := strings.Split(remarks, ";") + parts := splitRemarks(remarks) for i := range parts { // Check if the currency is in the remark var addressIndex int diff --git a/pkg/ofac/reader_test.go b/pkg/ofac/reader_test.go index d3a60f74..32ca3bf5 100644 --- a/pkg/ofac/reader_test.go +++ b/pkg/ofac/reader_test.go @@ -120,6 +120,51 @@ func TestSDNComments(t *testing.T) { } } +func TestSDN__remarks(t *testing.T) { + // individual + remarks := splitRemarks("DOB 12 Oct 1972; POB Corozal, Belize; Passport 0291622 (Belize); Linked To: D'S SUPERMARKET COMPANY LTD.") + expected := []string{"12 Oct 1972"} + require.ElementsMatch(t, expected, findRemarkValues(remarks, "DOB")) + expected = []string{"0291622 (Belize)"} + require.ElementsMatch(t, expected, findRemarkValues(remarks, "Passport")) + + // Contact info + remarks = splitRemarks("Website www.nitc.co.ir; Email Address info@nitc.co.ir; alt. Email Address administrator@nitc.co.ir; IFCA Determination - Involved in the Shipping Sector; Additional Sanctions Information - Subject to Secondary Sanctions; Telephone (98)(21)(66153220); Telephone (98)(21)(23803202); Telephone (98)(21)(23803303); Telephone (98)(21)(66153224); Telephone (98)(21)(23802230); Telephone (98)(9121115315); Telephone (98)(9128091642); Telephone (98)(9127389031); Fax (98)(21)(22224537); Fax (98)(21)(23803318); Fax (98)(21)(22013392); Fax (98)(21)(22058763).") + expected = []string{"www.nitc.co.ir"} + require.ElementsMatch(t, expected, findRemarkValues(remarks, "Website")) + expected = []string{"info@nitc.co.ir", "administrator@nitc.co.ir"} + require.ElementsMatch(t, expected, findRemarkValues(remarks, "Email Address")) + expected = []string{"(98)(21)(66153220)", "(98)(21)(23803202)", "(98)(21)(23803303)", "(98)(21)(66153224)", "(98)(21)(23802230)", "(98)(9121115315)", "(98)(9128091642)", "(98)(9127389031)"} + require.ElementsMatch(t, expected, findRemarkValues(remarks, "Telephone")) + expected = []string{"(98)(21)(22224537)", "(98)(21)(23803318)", "(98)(21)(22013392)", "(98)(21)(22058763)"} + require.ElementsMatch(t, expected, findRemarkValues(remarks, "Fax")) + + // Vessel + remarks = splitRemarks("Former Vessel Flag Malta; alt. Former Vessel Flag Tuvalu; alt. Former Vessel Flag None Identified; alt. Former Vessel Flag Tanzania; Additional Sanctions Information - Subject to Secondary Sanctions; Vessel Registration Identification IMO 9187629; MMSI 572469210; Linked To: NATIONAL IRANIAN TANKER COMPANY.") + expected = []string{"9187629"} + require.ElementsMatch(t, expected, findRemarkValues(remarks, "Vessel Registration Identification IMO")) + expected = []string{"572469210"} + require.ElementsMatch(t, expected, findRemarkValues(remarks, "MMSI")) + + // Aircraft + remarks = splitRemarks("Aircraft Construction Number (also called L/N or S/N or F/N) 8401; Aircraft Manufacture Date 1992; Aircraft Model IL76-TD; Aircraft Operator YAS AIR; Aircraft Manufacturer's Serial Number (MSN) 1023409321; Linked To: POUYA AIR.") + expected = []string{"1992"} + require.ElementsMatch(t, expected, findRemarkValues(remarks, "Manufacture Date")) + expected = []string{"IL76-TD"} + require.ElementsMatch(t, expected, findRemarkValues(remarks, "Model")) + expected = []string{"(MSN) 1023409321"} + require.ElementsMatch(t, expected, findRemarkValues(remarks, "Serial Number")) + + t.Run("error conditions", func(t *testing.T) { + remarks = splitRemarks("") + require.Len(t, findRemarkValues(remarks, ""), 0) + require.Len(t, findRemarkValues(remarks, "DOB"), 0) + + remarks = splitRemarks(" ; ;;;;; ; ;") + require.Len(t, findRemarkValues(remarks, "DOB"), 0) + }) +} + func TestSDNComments_CryptoCurrencies(t *testing.T) { fd, err := os.CreateTemp("", "sdn-comments") require.NoError(t, err) diff --git a/pkg/search/models.go b/pkg/search/models.go index 3e3171d2..26eb604f 100644 --- a/pkg/search/models.go +++ b/pkg/search/models.go @@ -30,7 +30,7 @@ var ( EntityBusiness EntityType = "business" EntityAircraft EntityType = "aircraft" EntityVessel EntityType = "vessel" - EntityCryptoAddress EntityType = "crypto-address" + EntityCryptoAddress EntityType = "crypto-address" // TODO(adam): Does this make sense? ) type SourceList string @@ -59,8 +59,24 @@ var ( GenderFemale Gender = "female" ) +type ContactInfo struct { + EmailAddresses []string + PhoneNumbers []string + FaxNumbers []string +} + +// TODO(adam): +// +// Website www.tidewaterco.com; +// Email Address info@tidewaterco.com; alt. Email Address info@tidewaterco.ir; +// Telephone: 982188553321; Alt. Telephone: 982188554432; +// Fax: 982188717367; Alt. Fax: 982188708761; +// +// 12803,"TIDEWATER MIDDLE EAST CO.",-0- ,"SDGT] [NPWMD] [IRGC] [IFSR] [IFCA",-0- ,-0- ,-0- ,-0- ,-0- ,-0- ,-0- ," alt. Email Address info@tidewaterco.ir; IFCA Determination - Port Operator; Additional Sanctions Information - Subject to Secondary Sanctions; Business Registration Document # 18745 (Iran); Alt. Fax: 982188708911." + type GovernmentID struct { Type GovernmentIDType `json:"type"` + Country string `json:"country"` // ISO-3166 Identifier string `json:"identifier"` } From 8d81443b5fb60c143748decc90ec4918eb2c8812 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Mon, 8 Jan 2024 13:29:02 -0600 Subject: [PATCH 03/26] ofac: work out mapper for SDN -> Entity[SDN] --- pkg/ofac/mapper.go | 209 ++++++++++++++++++++++++++++++++++++++-- pkg/ofac/mapper_test.go | 43 +++++++++ pkg/ofac/reader.go | 49 +++++++++- pkg/search/models.go | 18 ++-- 4 files changed, 297 insertions(+), 22 deletions(-) create mode 100644 pkg/ofac/mapper_test.go diff --git a/pkg/ofac/mapper.go b/pkg/ofac/mapper.go index 7c084c9b..34329e8d 100644 --- a/pkg/ofac/mapper.go +++ b/pkg/ofac/mapper.go @@ -1,7 +1,9 @@ package ofac import ( + "regexp" "strings" + "time" "github.com/moov-io/watchman/pkg/search" ) @@ -22,22 +24,56 @@ func ToEntity(sdn SDN) search.Entity[SDN] { SourceData: sdn, } + remarks := splitRemarks(sdn.Remarks) + switch strings.ToLower(strings.TrimSpace(sdn.SDNType)) { case "-0-", "": - out.Type = search.EntityBusiness // TODO(adam): or EntityOrganization - // TODO(adam): How to tell Business vs Organization ? + out.Type = search.EntityBusiness + // Set properties + out.Business = &search.Business{ + Name: sdn.SDNName, + } + out.Business.Identifier = makeIdentifiers(remarks, []string{ + "Branch Unit Number", + "Business Number", + "Business Registration Document", + "Business Registration Number", + "Certificate of Incorporation Number", + "Chamber of Commerce Number", + "Chinese Commercial Code", + "Registered Charity No.", + }) case "individual": out.Type = search.EntityPerson out.Person = &search.Person{ - Name: sdn.SDNName, + Name: sdn.SDNName, + Gender: search.Gender(strings.ToLower(firstValue(findMatchingRemarks(remarks, "Gender")))), } + out.Person.BirthDate = withFirstP(findMatchingRemarks(remarks, "DOB"), func(in remark) *time.Time { + // TODO(adam): handle + // DOB 01 Apr 1950 + // DOB 01 Feb 1958 to 28 Feb 1958 + // DOB 1928 + // DOB 1929 to 1930 + // DOB Sep 1958 + // DOB circa 01 Jan 1961 + // DOB circa 1934 + // DOB circa 1979-1982 + + t, _ := time.Parse("02 Jan 2006", in.value) + return &t + }) // TODO(adam): - // DOB 02 Aug 1991; + // citizen Venezuela + // // nationality Russia; - // Gender Male; - // Passport 0291622 (Belize); + // nationality: Eritrean + // + // POB 'Adlun, Lebanon + // Alt. POB: Keren Eritrea + // POB Abadan, Iran case "vessel": out.Type = search.EntityVessel @@ -73,12 +109,171 @@ func ToEntity(sdn SDN) search.Entity[SDN] { } // TODO(adam): - // Aircraft Construction Number (also called L/N or S/N or F/N) 8401; + // Aircraft Construction Number (also called L/N or S/N or F/N) 10907; + // // Aircraft Manufacture Date 1992; + // Aircraft Manufacture Date 01 Dec 1981; + // Aircraft Manufacture Date Apr 1993; + // // Aircraft Model IL76-TD; + // Aircraft Model B.747-422 + // Aircraft Model Gulfstream 200 + // // Aircraft Operator YAS AIR; // Aircraft Manufacturer's Serial Number (MSN) 1023409321; + // + // Previous Aircraft Tail Number 2-WGLP } return out } + +var parenCountryRegex = regexp.MustCompile(`\(([\w\s]+)\)`) + +func makeIdentifier(remarks []string, suffix string) *search.Identifier { + found := findMatchingRemarks(remarks, suffix) + if len(found) == 0 { + return nil + } + + // Often the country is in parenthesis at the end, so let's look for that + // + // Business Number 51566843 (Hong Kong) + country := parenCountryRegex.FindString(found[0].value) + country = strings.TrimPrefix(strings.TrimSuffix(country, ")"), "(") + + return &search.Identifier{ + Name: found[0].fullName, + Country: country, // ISO-3166 // TODO(adam): + Identifier: found[0].value, + } +} + +func makeIdentifiers(remarks []string, needles []string) []search.Identifier { + var out []search.Identifier + for i := range needles { + if id := makeIdentifier(remarks, needles[i]); id != nil { + out = append(out, *id) + } + } + return out +} + +// TODO(adam): +// Drop "alt. " + +// ContactInfo +// Fax: 0097282858208. +// Fax No. (022) 7363196. +// Fax (356)(25990640) +// FAX 850 2 381 4431/4432 +// Alt. Fax: 9221227700019 +// +// Alt. Telephone: 982188554432 +// Telephone (356)(21241232) +// Telephone + 97165749996 +// Telephone +31 010-4951863 +// Telephone No. (022) 7363030 +// Telephone Number: (971) (4) (3248000). +// PHONE 850 2 18111 8204/8208 +// PHONE 850 2 18111 ext. 8221 +// Phone No. 263-4-486946 +// Phone Number 982188526300 +// +// EMAIL daesong@co.chesin.com. +// Email Address EnExchanger@gmail.com +// Email:adelb@shabakah.net.sa. +// info@sanabel.org.uk (email). +// +// Website Oboronlogistika.ru +// Website http://comitet.su/about/ +// http://www.saraproperties.co.uk (website). + +// a.k.a. 'ABU AHMAD ISHAB'. +// a.k.a. 'ZAMANI, Aziz Shah' + +// GovernmentIDs +// +// Cedula No. 94428531 (Colombia) +// Cedula No. 94487319 (Colombia) issued 31 Oct 1994 +// Birth Certificate Number 32270 (Iran) +// Bosnian Personal ID No. 1005967953038 +// British National Overseas Passport 750200421 (United Kingdom) +// C.I.F. B84758374 (Spain). +// C.R. No. 03-B-1620 +// C.R. No. J10/623/1997 (Romania) +// C.U.I.P. AOIR671020H1374898 (Mexico). +// C.U.I.T. 20-60357110-0 (Argentina) +// C.U.R.P. # HESU430525HBCRMR13 (Mexico) +// CNP (Personal Numerical Code) 7460301380011 (Romania) +// Cartilla de Servicio Militar Nacional 607092 (Mexico). +// Citizen's Card Number 210222198011096648 (China). +// Commercial Registry Number 0411518776478 (Iran) +// Commercial Registry Number CH-020.1.066.499-9 (Switzerland) +// Company ID: No. 59 531 at Commercial Registry of the Civil Court of First Instance at Baabda, Lebanon. +// Company Number 05527424 (United Kingdom) +// Company Number IMO 1991835. +// Credencial electoral 073855815496 (Mexico). +// D-U-N-S Number 33-843-5672 +// D.N.I. 00263695-T (Spain) +// Diplomatic Passport 00000017 (Yemen) issued 27 Oct 2008 expires 26 Oct 2014 +// Driver's License No. 04900377 (Moldova) issued 02 Jul 2004 +// Driver's License No. 07442833 (United States) expires 15 Mar 2016 +// Driver's License No. 1-1-22-07-00030905-3 (Guatemala) expires 2010. +// Driver's License No. M600161650080 (United States) issued 07 Apr 2006 expires 08 Jan 2011. +// Driver's License is issued by the State of Texas. +// Dubai Chamber of Commerce Membership No. 123076 (United Arab Emirates). +// Electoral Registry No. 07385114 (Afghanistan). +// Enterprise Number 0430.033.662 (Belgium). +// Fiscal Code 9896460 (Romania). +// Folio Mercantil No. 10328 (Jalisco) (Mexico). +// Government Gazette Number 00132598 (Russia). +// I.F.E. 05116040222575 (Mexico). +// Identification Number 0-16 Reg 53089 (Guatemala) +// Immigration No. A38839964 (United States). +// Interpol: Red Notice. File No. 2009/3599. March 24, 2009. Orange Notice. File No. 2009/52/OS/CCC. February 10, 2009. +// Italian Fiscal Code BCHMHT69R13Z352T. +// Kenyan ID No. 12773667 +// LE Number 07541863 (Peru). +// Legal Entity Number 851683897 (Netherlands) +// License 1249 (Russia). +// Matricula Mercantil No 0000104026 (Colombia). +// N.I.E. X-1552120-B (Spain). +// NIT # 16215230-1 (Colombia). +// National Foreign ID Number 210602197107153012 (China) +// National ID No. (HWI)040182 (Burma) +// Passport #H0044232 (Iraq). +// Passport 00016161 (Yemen) issued 19 Jun 2012 expires 18 Jun 2018 +// Passport No.: 0310857, Eritrea, Issue Date 21 August 2006, Expire Date 20 August 2008) +// Personal ID Card 00246412491303975500493 (Slovenia) expires 17 Dec 2018 +// Pilot License Number 2326384 +// Public Registration Number 1021801434380. +// Public Registration Number 1041202 (Virgin Islands, British) +// R.F.C. # IES-870805 (Mexico). +// RFC AAIJ810808SX4 (Mexico) +// RIF # J-00317392-4 (Venezuela). +// RTN 01019995013319 (Honduras) +// RUC # 1008619-1-537654 (Panama). +// Refugee ID Card A88000043 (Moldova) issued 16 Dec 2005. +// Registration ID 0000421465 (Poland) +// Registration Number 1027700499903 (Russia) +// Residency Number 003-5506420-0100028 (Costa Rica). +// Romanian C.R. J23/242/2004 (Romania). +// Romanian Permanent Resident CAN 0125477 (Romania) issued 13 Jul 2007. +// Romanian Tax Registration 14637977 (Romania). +// SSN 156-92-9858 (United States) +// SSN 33-3208848-3 (Philippines) +// SWIFT/BIC AFABAFKA +// Stateless Person ID Card CC00200261 (Moldova) issued 09 Sep 2000 +// Stateless Person Passport C000375 (Moldova) issued 09 Sep 2000 +// Tax ID No. 002235933 (Canada). +// Trade License No. 04110179 (United Kingdom). +// Travel Document Number A0003900 (Germany) +// Turkish Identificiation Number 10298480866 (Turkey). +// U.S.A. Passport issued 21 Jun 1992 in Amman, Jordan. +// UAE Identification 784-1968-9720837-5 +// UK Company Number 01019769 (United Kingdom) +// US FEIN 000920912 (United States). +// United Social Credit Code Certificate (USCCC) 91420112711981060J (China) +// V.A.T. Number 0430.033.662 (Belgium) +// VisaNumberID 2024702 (Mexico). diff --git a/pkg/ofac/mapper_test.go b/pkg/ofac/mapper_test.go new file mode 100644 index 00000000..c1cbf054 --- /dev/null +++ b/pkg/ofac/mapper_test.go @@ -0,0 +1,43 @@ +package ofac + +import ( + "path/filepath" + "testing" + "time" + + "github.com/moov-io/watchman/pkg/search" + + "github.com/stretchr/testify/require" +) + +func TestMapper(t *testing.T) { + res, err := Read(filepath.Join("..", "..", "test", "testdata", "sdn.csv")) + require.NoError(t, err) + + var sdn *SDN + for i := range res.SDNs { + if res.SDNs[i].EntityID == "15102" { + sdn = res.SDNs[i] + } + } + require.NotNil(t, sdn) + + e := ToEntity(*sdn) + require.Equal(t, "MORENO, Daniel", e.Name) + require.Equal(t, search.EntityPerson, e.Type) + require.Equal(t, search.SourceUSOFAC, e.Source) + + require.NotNil(t, e.Person) + require.Equal(t, "MORENO, Daniel", e.Person.Name) + require.Equal(t, "", string(e.Person.Gender)) + require.Equal(t, "1972-10-12T00:00:00Z", e.Person.BirthDate.Format(time.RFC3339)) + require.Nil(t, e.Person.DeathDate) + require.Len(t, e.Person.GovernmentIDs, 0) + + require.Nil(t, e.Business) + require.Nil(t, e.Organization) + require.Nil(t, e.Aircraft) + require.Nil(t, e.Vessel) + + require.Equal(t, "15102", e.SourceData.EntityID) +} diff --git a/pkg/ofac/reader.go b/pkg/ofac/reader.go index e6192412..737de1f6 100644 --- a/pkg/ofac/reader.go +++ b/pkg/ofac/reader.go @@ -256,8 +256,14 @@ func splitRemarks(input string) []string { return strings.Split(input, ";") } -func findRemarkValues(remarks []string, suffix string) []string { - var out []string +type remark struct { + matchedName string + fullName string + value string +} + +func findMatchingRemarks(remarks []string, suffix string) []remark { + var out []remark if suffix == "" { return out } @@ -271,11 +277,48 @@ func findRemarkValues(remarks []string, suffix string) []string { value = strings.TrimPrefix(value, ":") // identifiers can end with a colon value = strings.TrimSuffix(value, ";") value = strings.TrimSuffix(value, ".") - out = append(out, strings.TrimSpace(value)) + + out = append(out, remark{ + matchedName: suffix, + fullName: remarks[i][:idx+len(suffix)], + value: strings.TrimSpace(value), + }) } return out } +func findRemarkValues(remarks []string, suffix string) []string { + found := findMatchingRemarks(remarks, suffix) + var out []string + for i := range found { + out = append(out, found[i].value) + } + return out +} + +func firstValue(values []remark) string { + if len(values) == 0 { + return "" + } + return values[0].value +} + +func withFirstF[T any](values []remark, f func(remark) T) T { + if len(values) == 0 { + var zero T + return zero + } + return f(values[0]) +} + +func withFirstP[T any](values []remark, f func(remark) *T) *T { + if len(values) == 0 { + var zero T + return &zero + } + return f(values[0]) +} + var ( digitalCurrencies = []string{ "XBT", // Bitcoin diff --git a/pkg/search/models.go b/pkg/search/models.go index 26eb604f..89a65960 100644 --- a/pkg/search/models.go +++ b/pkg/search/models.go @@ -76,7 +76,7 @@ type ContactInfo struct { type GovernmentID struct { Type GovernmentIDType `json:"type"` - Country string `json:"country"` // ISO-3166 + Country string `json:"country"` // ISO-3166 // TODO(adam): Identifier string `json:"identifier"` } @@ -98,17 +98,11 @@ type Business struct { // TODO(adam): Look at OpenSanctions for tax ID codes // https://www.opensanctions.org/reference/#schema.Company type Identifier struct { - Type IdentifierType `json:"type"` - Identifier string `json:"value"` + Name string `json:"string"` + Country string `json:"country"` // ISO-3166 // TODO(adam): + Identifier string `json:"value"` } -type IdentifierType string - -var ( - Identifier_US_EIN IdentifierType = "us_ein" - Identifier_US_SSN IdentifierType = "us_ssn" -) - // Organization // // TODO(adam): https://www.opensanctions.org/reference/#schema.Organization @@ -122,7 +116,7 @@ type Organization struct { type Aircraft struct { Name string `json:"name"` Type AircraftType `json:"type"` - Flag string `json:"flag"` // ISO-3166 + Flag string `json:"flag"` // ISO-3166 // TODO(adam): Built *time.Time `json:"built"` ICAOCode string `json:"icaoCode"` // ICAO aircraft type designator Model string `json:"model"` @@ -143,7 +137,7 @@ type Vessel struct { Name string `json:"name"` IMONumber string `json:"imoNumber"` Type VesselType `json:"type"` - Flag string `json:"flag"` // ISO-3166 + Flag string `json:"flag"` // ISO-3166 // TODO(adam): Built *time.Time `json:"built"` Model string `json:"model"` Tonnage int `json:"tonnage"` From bdf548afebb5ac9bf1e4c0854f00a63d5362e33b Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Fri, 12 Jan 2024 16:23:47 -0600 Subject: [PATCH 04/26] ofac: setup mapper for Aircraft and Vessels --- pkg/ofac/mapper.go | 105 +++++++++++++++++++++++++--------------- pkg/ofac/mapper_test.go | 92 ++++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 41 deletions(-) diff --git a/pkg/ofac/mapper.go b/pkg/ofac/mapper.go index 34329e8d..01e4014b 100644 --- a/pkg/ofac/mapper.go +++ b/pkg/ofac/mapper.go @@ -51,17 +51,10 @@ func ToEntity(sdn SDN) search.Entity[SDN] { Gender: search.Gender(strings.ToLower(firstValue(findMatchingRemarks(remarks, "Gender")))), } out.Person.BirthDate = withFirstP(findMatchingRemarks(remarks, "DOB"), func(in remark) *time.Time { - // TODO(adam): handle - // DOB 01 Apr 1950 - // DOB 01 Feb 1958 to 28 Feb 1958 - // DOB 1928 - // DOB 1929 to 1930 - // DOB Sep 1958 - // DOB circa 01 Jan 1961 - // DOB circa 1934 - // DOB circa 1979-1982 - - t, _ := time.Parse("02 Jan 2006", in.value) + t, err := parseTime(dobPatterns, in.value) + if t.IsZero() || err != nil { + return nil + } return &t }) @@ -78,50 +71,46 @@ func ToEntity(sdn SDN) search.Entity[SDN] { case "vessel": out.Type = search.EntityVessel out.Vessel = &search.Vessel{ - Name: sdn.SDNName, - - // IMONumber string `json:"imoNumber"` - // Type VesselType `json:"type"` - // Flag string `json:"flag"` // ISO-3166 + Name: sdn.SDNName, + IMONumber: firstValue(findMatchingRemarks(remarks, "IMO")), + Type: withFirstF(findMatchingRemarks(remarks, "Vessel Type"), func(r remark) search.VesselType { + return search.VesselType(r.value) // TODO(adam): OFAC values are not an enum + }), + Flag: firstValue(findMatchingRemarks(remarks, "Flag")), // TODO(adam): ISO-3166 // Built *time.Time `json:"built"` // Model string `json:"model"` // Tonnage int `json:"tonnage"` // TODO(adam): remove , and ParseInt - // MMSI string `json:"mmsi"` // Maritime Mobile Service Identity + MMSI: firstValue(findMatchingRemarks(remarks, "MMSI")), } - // TODO(adam): - // Vessel Registration Identification IMO 9569712; - // MMSI 572469210; - // - // Former Vessel Flag None Identified; alt. Former Vessel Flag Tanzania; - case "aircraft": out.Type = search.EntityAircraft out.Aircraft = &search.Aircraft{ Name: sdn.SDNName, - // Type AircraftType `json:"type"` - // Flag string `json:"flag"` // ISO-3166 - // Built *time.Time `json:"built"` + Flag: firstValue(findMatchingRemarks(remarks, "Flag")), // TODO(adam): ISO-3166 + Built: withFirstP(findMatchingRemarks(remarks, "Manufacture Date"), func(in remark) *time.Time { + t, err := parseTime(dobPatterns, in.value) + if t.IsZero() || err != nil { + return nil + } + return &t + }), // ICAOCode string `json:"icaoCode"` // ICAO aircraft type designator - // Model string `json:"model"` - // SerialNumber string `json:"serialNumber"` + Model: firstValue(findMatchingRemarks(remarks, "Aircraft Model")), + SerialNumber: withFirstF(findMatchingRemarks(remarks, "Serial Number"), func(r remark) string { + // Trim parens from these remarks + // e.g. "Aircraft Manufacturer's Serial Number (MSN) 1023409321;" + idx := strings.Index(r.value, ")") + if idx > -1 && len(r.value) > idx+1 { + r.value = strings.TrimSpace(r.value[idx+1:]) + } + return r.value + }), } // TODO(adam): - // Aircraft Construction Number (also called L/N or S/N or F/N) 10907; - // - // Aircraft Manufacture Date 1992; - // Aircraft Manufacture Date 01 Dec 1981; - // Aircraft Manufacture Date Apr 1993; - // - // Aircraft Model IL76-TD; - // Aircraft Model B.747-422 - // Aircraft Model Gulfstream 200 - // // Aircraft Operator YAS AIR; - // Aircraft Manufacturer's Serial Number (MSN) 1023409321; - // // Previous Aircraft Tail Number 2-WGLP } @@ -159,6 +148,42 @@ func makeIdentifiers(remarks []string, needles []string) []search.Identifier { return out } +var ( + dobPatterns = []string{ + "02 Jan 2006", // 01 Apr 1950 + "Jan 2006", // Sep 1958 + "2006", // 1928 + } +) + +func parseTime(acceptedLayouts []string, value string) (time.Time, error) { + // We don't currently support ranges for birth dates, so take the first date provided + // Examples include: + // 01 Feb 1958 to 28 Feb 1958 + // circa 1934 + // circa 1979-1982 + value = strings.TrimSpace(strings.ReplaceAll(value, "circa", "")) + + parts := strings.Split(value, "to") + if len(parts) > 1 { + value = parts[0] + } else { + parts = strings.Split(value, "-") + if len(parts) > 1 { + value = parts[0] + } + } + value = strings.TrimSpace(value) + + for i := range acceptedLayouts { + tt, err := time.Parse(acceptedLayouts[i], value) + if !tt.IsZero() && err == nil { + return tt, nil + } + } + return time.Time{}, nil +} + // TODO(adam): // Drop "alt. " diff --git a/pkg/ofac/mapper_test.go b/pkg/ofac/mapper_test.go index c1cbf054..6e23d02c 100644 --- a/pkg/ofac/mapper_test.go +++ b/pkg/ofac/mapper_test.go @@ -10,7 +10,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestMapper(t *testing.T) { +func TestMapper__Person(t *testing.T) { res, err := Read(filepath.Join("..", "..", "test", "testdata", "sdn.csv")) require.NoError(t, err) @@ -41,3 +41,93 @@ func TestMapper(t *testing.T) { require.Equal(t, "15102", e.SourceData.EntityID) } + +func TestMapper__Vessel(t *testing.T) { + res, err := Read(filepath.Join("..", "..", "test", "testdata", "sdn.csv")) + require.NoError(t, err) + + var sdn *SDN + for i := range res.SDNs { + if res.SDNs[i].EntityID == "15036" { + sdn = res.SDNs[i] + } + } + require.NotNil(t, sdn) + + e := ToEntity(*sdn) + require.Equal(t, "ARTAVIL", e.Name) + require.Equal(t, search.EntityVessel, e.Type) + require.Equal(t, search.SourceUSOFAC, e.Source) + + require.Nil(t, e.Person) + require.Nil(t, e.Business) + require.Nil(t, e.Organization) + require.Nil(t, e.Aircraft) + require.NotNil(t, e.Vessel) + + require.Equal(t, "ARTAVIL", e.Vessel.Name) + require.Equal(t, "Malta", e.Vessel.Flag) + require.Equal(t, "9187629", e.Vessel.IMONumber) + require.Equal(t, "572469210", e.Vessel.MMSI) + + require.Equal(t, "15036", e.SourceData.EntityID) +} + +func TestMapper__Aircraft(t *testing.T) { + res, err := Read(filepath.Join("..", "..", "test", "testdata", "sdn.csv")) + require.NoError(t, err) + + var sdn *SDN + for i := range res.SDNs { + if res.SDNs[i].EntityID == "18158" { + sdn = res.SDNs[i] + } + } + require.NotNil(t, sdn) + + e := ToEntity(*sdn) + require.Equal(t, "MSN 550", e.Name) + require.Equal(t, search.EntityAircraft, e.Type) + require.Equal(t, search.SourceUSOFAC, e.Source) + + require.Nil(t, e.Person) + require.Nil(t, e.Business) + require.Nil(t, e.Organization) + require.NotNil(t, e.Aircraft) + require.Nil(t, e.Vessel) + + require.Equal(t, "MSN 550", e.Aircraft.Name) + require.Equal(t, "1995-01-01", e.Aircraft.Built.Format(time.DateOnly)) + require.Equal(t, "Airbus A321-131", e.Aircraft.Model) + require.Equal(t, "550", e.Aircraft.SerialNumber) + + require.Equal(t, "18158", e.SourceData.EntityID) +} + +func TestParseTime(t *testing.T) { + t.Run("DOB", func(t *testing.T) { + tt, _ := parseTime(dobPatterns, "01 Apr 1950") + require.Equal(t, "1950-04-01", tt.Format(time.DateOnly)) + + tt, _ = parseTime(dobPatterns, "01 Feb 1958 to 28 Feb 1958") + require.Equal(t, "1958-02-01", tt.Format(time.DateOnly)) + + tt, _ = parseTime(dobPatterns, "1928") + require.Equal(t, "1928-01-01", tt.Format(time.DateOnly)) + + tt, _ = parseTime(dobPatterns, "1928 to 1930") + require.Equal(t, "1928-01-01", tt.Format(time.DateOnly)) + + tt, _ = parseTime(dobPatterns, "Sep 1958") + require.Equal(t, "1958-09-01", tt.Format(time.DateOnly)) + + tt, _ = parseTime(dobPatterns, "circa 01 Jan 1961") + require.Equal(t, "1961-01-01", tt.Format(time.DateOnly)) + + tt, _ = parseTime(dobPatterns, "circa 1934") + require.Equal(t, "1934-01-01", tt.Format(time.DateOnly)) + + tt, _ = parseTime(dobPatterns, "circa 1979-1982") + require.Equal(t, "1979-01-01", tt.Format(time.DateOnly)) + }) +} From 1692849f4a14b3044713231fb9dd852e0ba2bdd5 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Tue, 12 Mar 2024 08:27:12 -0500 Subject: [PATCH 05/26] feat: initial search endpoint --- internal/search/api_search.go | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 internal/search/api_search.go diff --git a/internal/search/api_search.go b/internal/search/api_search.go new file mode 100644 index 00000000..0bf34117 --- /dev/null +++ b/internal/search/api_search.go @@ -0,0 +1,41 @@ +package search + +import ( + "net/http" + + "github.com/moov-io/base/log" + + "github.com/gorilla/mux" +) + +// GET /v2/search + +type Controller interface { + AppendRoutes(router *mux.Router) *mux.Router +} + +func NewController(logger log.Logger, service Service) Controller { + return &controller{ + logger: logger, + service: service, + } +} + +type controller struct { + logger log.Logger + service Service +} + +func (c *controller) AppendRoutes(router *mux.Router) *mux.Router { + router. + Name("Search.v2"). + Methods("GET"). + Path("/v2/search"). + HandlerFunc(c.search) + + return router +} + +func (c *controller) search(w http.ResponseWriter, r *http.Request) { + // TODO(adam): +} From 432fd2339d47aec7ca30a5ff01ffe8c1c275a53e Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Tue, 12 Mar 2024 08:27:33 -0500 Subject: [PATCH 06/26] search: add SourceID (to Entity) and AltNames (to Person) --- pkg/search/models.go | 8 +++++--- pkg/search/models_test.go | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg/search/models.go b/pkg/search/models.go index 89a65960..861e8673 100644 --- a/pkg/search/models.go +++ b/pkg/search/models.go @@ -3,9 +3,10 @@ package search import "time" type Entity[T any] struct { - Name string `json:"name"` - Type EntityType `json:"entityType"` - Source SourceList `json:"sourceList"` + Name string `json:"name"` + Type EntityType `json:"entityType"` + Source SourceList `json:"sourceList"` + SourceID string `json:"sourceID"` // TODO(adam): // TODO(adam): What has opensanctions done to normalize and join this data // Review https://www.opensanctions.org/reference/ @@ -44,6 +45,7 @@ var ( type Person struct { Name string `json:"name"` + AltNames []string `json:"altNames"` Gender Gender `json:"gender"` BirthDate *time.Time `json:"birthDate"` DeathDate *time.Time `json:"deathDate"` diff --git a/pkg/search/models_test.go b/pkg/search/models_test.go index a5e6a9df..f6a0ca5f 100644 --- a/pkg/search/models_test.go +++ b/pkg/search/models_test.go @@ -23,6 +23,7 @@ func TestEntityJSON(t *testing.T) { "name": "", "entityType": "", "sourceList": "", + "sourceID": "", "person": null, "business": null, "organization": null, From b05cb478e32d4bb943a94ecc07e4fe4f53309369 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Tue, 12 Mar 2024 08:28:03 -0500 Subject: [PATCH 07/26] feat: start on EU and US CSL mappers --- pkg/csl_eu/mapper.go | 42 ++++++++++ pkg/csl_us/mapper.go | 186 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 228 insertions(+) create mode 100644 pkg/csl_eu/mapper.go create mode 100644 pkg/csl_us/mapper.go diff --git a/pkg/csl_eu/mapper.go b/pkg/csl_eu/mapper.go new file mode 100644 index 00000000..6688b778 --- /dev/null +++ b/pkg/csl_eu/mapper.go @@ -0,0 +1,42 @@ +package csl + +import ( + "strings" + "time" + + "github.com/moov-io/watchman/pkg/csl" + "github.com/moov-io/watchman/pkg/search" +) + +func PtrToEntity(record *csl.EUCSLRecord) search.Entity[csl.EUCSLRecord] { + if record != nil { + return ToEntity(*record) + } + return search.Entity[csl.EUCSLRecord]{} +} + +func ToEntity(record csl.EUCSLRecord) search.Entity[csl.EUCSLRecord] { + out := search.Entity[csl.EUCSLRecord]{ + Source: search.SourceEUCSL, + SourceData: record, + } + + if strings.EqualFold(record.EntitySubjectType, "person") { + out.Type = search.EntityPerson + out.Person = &search.Person{} + + if len(record.NameAliasWholeNames) > 0 { + out.Name = record.NameAliasWholeNames[0] + out.Person.Name = record.NameAliasWholeNames[0] + out.Person.AltNames = record.NameAliasWholeNames[1:] + } + if len(record.BirthDates) > 0 { + tt, err := time.Parse("2006-01-02", record.BirthDates[0]) + if err == nil { + out.Person.BirthDate = &tt + } + } + } + + return out +} diff --git a/pkg/csl_us/mapper.go b/pkg/csl_us/mapper.go new file mode 100644 index 00000000..da3768ef --- /dev/null +++ b/pkg/csl_us/mapper.go @@ -0,0 +1,186 @@ +package mapper + +import ( + "github.com/moov-io/watchman/pkg/csl" + "github.com/moov-io/watchman/pkg/search" +) + +// Entity List – Bureau of Industry and Security +func EL_ToEntity(record csl.EL) search.Entity[csl.EL] { + out := search.Entity[csl.EL]{ + Source: search.SourceUSCSL, + SourceData: record, + } + + out.Name = record.Name + // out.Type = // TODO(adam): + + // record.AlternateNames []string // TODO(adam): + // record.Addresses []string // TODO(adam): + + return out +} + +// Military End User List +func MEU_ToEntity(record csl.MEU) search.Entity[csl.MEU] { + out := search.Entity[csl.MEU]{ + Source: search.SourceUSCSL, + SourceData: record, + } + + out.Name = record.Name + + // Name string `json:"name"` + // Addresses string `json:"addresses"` + + return out +} + +// Sectoral Sanctions Identifications List (SSI) - Treasury Department +func SSI_ToEntity(record csl.SSI) search.Entity[csl.SSI] { + out := search.Entity[csl.SSI]{ + Source: search.SourceUSCSL, + SourceData: record, + } + + out.Name = record.Name + + // Addresses []string `json:"addresses"` + // Remarks []string `json:"remarks"` + // AlternateNames []string `json:"alternateNames"` + + // IDsOnRecord []string `json:"ids"` + + return out +} + +// Unverified List – Bureau of Industry and Security +func UVL_ToEntity(record csl.UVL) search.Entity[csl.UVL] { + out := search.Entity[csl.UVL]{ + Source: search.SourceUSCSL, + SourceData: record, + } + + out.Name = record.Name + // Addresses []string `json:"addresses"` + + return out +} + +// Foreign Sanctions Evaders (FSE) - Treasury Department +func FSE_ToEntity(record csl.FSE) search.Entity[csl.FSE] { + out := search.Entity[csl.FSE]{ + Source: search.SourceUSCSL, + SourceData: record, + } + + out.Name = record.Name + + // Type string `json:"type"` + // Addresses []string `json:"addresses,omitempty"` + // DatesOfBirth string `json:"datesOfBirth"` + // IDs []string `json:"IDs"` + + return out +} + +// Nonproliferation Sanctions (ISN) - State Department +func ISN_ToEntity(record csl.ISN) search.Entity[csl.ISN] { + out := search.Entity[csl.ISN]{ + Source: search.SourceUSCSL, + SourceData: record, + } + + out.Name = record.Name + + // Remarks []string `json:"remarks,omitempty"` + // AlternateNames []string `json:"alternateNames,omitempty"` + + return out +} + +// Palestinian Legislative Council List (PLC) - Treasury Department +func PLC_ToEntity(record csl.PLC) search.Entity[csl.PLC] { + out := search.Entity[csl.PLC]{ + Source: search.SourceUSCSL, + SourceData: record, + } + + out.Name = record.Name + + // Type string `json:"type"` + // Addresses []string `json:"addresses,omitempty"` + // DatesOfBirth string `json:"datesOfBirth"` + // IDs []string `json:"IDs"` + // Remarks []string `json:"remarks,omitempty"` + + return out +} + +// CAPTA (formerly Foreign Financial Institutions Subject to Part 561 - Treasury Department) +func CAP_ToEntity(record csl.CAP) search.Entity[csl.CAP] { + out := search.Entity[csl.CAP]{ + Source: search.SourceUSCSL, + SourceData: record, + } + + out.Name = record.Name + + // Type string `json:"type"` + // Addresses []string `json:"addresses,omitempty"` + // DatesOfBirth string `json:"datesOfBirth"` + // IDs []string `json:"IDs"` + // Remarks []string `json:"remarks,omitempty"` + + return out +} + +// ITAR Debarred (DTC) - State Department +func DTC_ToEntity(record csl.DTC) search.Entity[csl.DTC] { + out := search.Entity[csl.DTC]{ + Source: search.SourceUSCSL, + SourceData: record, + } + + out.Name = record.Name + + // AlternateNames []string `json:"alternateNames,omitempty"` + + return out +} + +// Non-SDN Chinese Military-Industrial Complex Companies List (CMIC) - Treasury Department +func CMIC_ToEntity(record csl.CMIC) search.Entity[csl.CMIC] { + out := search.Entity[csl.CMIC]{ + Source: search.SourceUSCSL, + SourceData: record, + } + + out.Name = record.Name + + // Type string `json:"type"` + // Addresses []string `json:"addresses,omitempty"` + // DatesOfBirth string `json:"datesOfBirth"` + // IDs []string `json:"IDs"` + // Remarks []string `json:"remarks,omitempty"` + + return out +} + +// Non-SDN Menu-Based Sanctions List (NS-MBS List) - Treasury Department +func NS_MBS_ToEntity(record csl.NS_MBS) search.Entity[csl.NS_MBS] { + out := search.Entity[csl.NS_MBS]{ + Source: search.SourceUSCSL, + SourceData: record, + } + + out.Name = record.Name + + // Type string `json:"type"` + // Addresses []string `json:"addresses,omitempty"` + // DatesOfBirth string `json:"datesOfBirth"` + // IDs []string `json:"IDs"` + // Remarks []string `json:"remarks,omitempty"` + + return out +} From 1bbd9ac981684a2ff50b399844e6bee956f4221b Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Tue, 17 Dec 2024 14:50:48 -0600 Subject: [PATCH 08/26] meta: fix compile --- internal/search/service.go | 5 +++++ pkg/ofac/mapper_test.go | 23 ++++++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 internal/search/service.go diff --git a/internal/search/service.go b/internal/search/service.go new file mode 100644 index 00000000..25b13347 --- /dev/null +++ b/internal/search/service.go @@ -0,0 +1,5 @@ +package search + +type Service interface { + // TODO(adam): +} diff --git a/pkg/ofac/mapper_test.go b/pkg/ofac/mapper_test.go index 6e23d02c..57122aad 100644 --- a/pkg/ofac/mapper_test.go +++ b/pkg/ofac/mapper_test.go @@ -1,6 +1,8 @@ package ofac import ( + "io" + "os" "path/filepath" "testing" "time" @@ -10,8 +12,23 @@ import ( "github.com/stretchr/testify/require" ) +func testInputs(tb testing.TB, paths ...string) map[string]io.ReadCloser { + tb.Helper() + + input := make(map[string]io.ReadCloser) + for _, path := range paths { + _, filename := filepath.Split(path) + + fd, err := os.Open(path) + require.NoError(tb, err) + + input[filename] = fd + } + return input +} + func TestMapper__Person(t *testing.T) { - res, err := Read(filepath.Join("..", "..", "test", "testdata", "sdn.csv")) + res, err := Read(testInputs(t, filepath.Join("..", "..", "test", "testdata", "sdn.csv"))) require.NoError(t, err) var sdn *SDN @@ -43,7 +60,7 @@ func TestMapper__Person(t *testing.T) { } func TestMapper__Vessel(t *testing.T) { - res, err := Read(filepath.Join("..", "..", "test", "testdata", "sdn.csv")) + res, err := Read(testInputs(t, filepath.Join("..", "..", "test", "testdata", "sdn.csv"))) require.NoError(t, err) var sdn *SDN @@ -74,7 +91,7 @@ func TestMapper__Vessel(t *testing.T) { } func TestMapper__Aircraft(t *testing.T) { - res, err := Read(filepath.Join("..", "..", "test", "testdata", "sdn.csv")) + res, err := Read(testInputs(t, filepath.Join("..", "..", "test", "testdata", "sdn.csv"))) require.NoError(t, err) var sdn *SDN From 62bf69b1c56f6855d503734fea43a53df62cac6e Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Tue, 17 Dec 2024 15:11:05 -0600 Subject: [PATCH 09/26] ofac: finish implementing mapper --- pkg/ofac/mapper.go | 504 +++++++++++++++++++++++++------ pkg/ofac/mapper_business_test.go | 78 +++++ pkg/ofac/mapper_person_test.go | 173 +++++++++++ pkg/ofac/mapper_test.go | 398 ++++++++++++++++++------ pkg/ofac/mapper_vehicles_test.go | 196 ++++++++++++ pkg/search/models.go | 59 +++- 6 files changed, 1218 insertions(+), 190 deletions(-) create mode 100644 pkg/ofac/mapper_business_test.go create mode 100644 pkg/ofac/mapper_person_test.go create mode 100644 pkg/ofac/mapper_vehicles_test.go diff --git a/pkg/ofac/mapper.go b/pkg/ofac/mapper.go index 01e4014b..f8468007 100644 --- a/pkg/ofac/mapper.go +++ b/pkg/ofac/mapper.go @@ -2,20 +2,108 @@ package ofac import ( "regexp" + "strconv" "strings" "time" "github.com/moov-io/watchman/pkg/search" ) -func PtrToEntity(sdn *SDN) search.Entity[SDN] { - if sdn != nil { - return ToEntity(*sdn) +// Regular expressions for parsing various fields +var ( + akaRegex = regexp.MustCompile(`(?i)a\.k\.a\.\s+'([^']+)'`) + // Matches both "citizen Venezuela" and "nationality: Russia" + citizenshipRegex = regexp.MustCompile(`(?i)(citizen|nationality)[:\s]+([^;,]+)`) + // Matches both "POB Baghdad, Iraq" and "Alt. POB: Keren Eritrea" + pobRegex = regexp.MustCompile(`(?i)(?:Alt\.)?\s*POB:?\s+([^;]+)`) + // Contact information patterns + emailRegex = regexp.MustCompile(`(?i)(?:Email|EMAIL)[:\s]+([^;]+)`) + phoneRegex = regexp.MustCompile(`(?i)(?:Telephone|Phone|PHONE)[:\s]+([^;]+)`) + faxRegex = regexp.MustCompile(`(?i)Fax[:\s]+([^;]+)`) + // Website patterns + websiteRegex = regexp.MustCompile(`(?i)(?:Website|http)[:\s]+([^;\s]+)`) + // Country extraction pattern + countryParenRegex = regexp.MustCompile(`\(([\w\s]+)\)`) +) + +var ( + dobPatterns = []string{ + "02 Jan 2006", // 01 Apr 1950 + "Jan 2006", // Sep 1958 + "2006", // 1928 } - return search.Entity[SDN]{} +) + +func makeIdentifiers(remarks []string, needles []string) []search.Identifier { + seen := make(map[string]bool) + var out []search.Identifier + + for i := range needles { + if id := makeIdentifier(remarks, needles[i]); id != nil { + // Create unique key from name and country + key := id.Name + "|" + id.Country + if !seen[key] { + seen[key] = true + out = append(out, *id) + } + } + } + return out } -// TODO(adam): Accept Addresses, Alts, Comments +func makeIdentifier(remarks []string, suffix string) *search.Identifier { + found := findMatchingRemarks(remarks, suffix) + if len(found) == 0 { + return nil + } + + // Often the country is in parenthesis at the end, so let's look for that + // Example: Business Number 51566843 (Hong Kong) + country := "" + value := found[0].value + + if matches := countryParenRegex.FindStringSubmatch(value); len(matches) > 1 { + country = matches[1] + // Remove the country part from the value + value = strings.TrimSpace(countryParenRegex.ReplaceAllString(value, "")) + } + + return &search.Identifier{ + Name: strings.TrimSpace(found[0].fullName), + Country: country, + Identifier: value, + } +} + +func parseTime(acceptedLayouts []string, value string) (time.Time, error) { + value = strings.TrimSpace(strings.ReplaceAll(value, "circa", "")) + parts := strings.Split(value, "to") + if len(parts) > 1 { + value = parts[0] + } else { + parts = strings.Split(value, "-") + if len(parts) > 1 { + value = parts[0] + } + } + value = strings.TrimSpace(value) + + for i := range acceptedLayouts { + tt, err := time.Parse(acceptedLayouts[i], value) + if !tt.IsZero() && err == nil { + return tt, nil + } + } + return time.Time{}, nil +} + +func extractCountry(remark string) string { + matches := countryParenRegex.FindStringSubmatch(remark) + if len(matches) > 1 { + return matches[1] + } + return "" +} func ToEntity(sdn SDN) search.Entity[SDN] { out := search.Entity[SDN]{ @@ -25,11 +113,20 @@ func ToEntity(sdn SDN) search.Entity[SDN] { } remarks := splitRemarks(sdn.Remarks) + affiliations, sanctionsInfo, historicalInfo, titles := parseRemarks(remarks) + + out.Affiliations = affiliations + out.SanctionsInfo = sanctionsInfo + out.HistoricalInfo = historicalInfo + out.Titles = titles + out.CryptoAddresses = parseCryptoAddresses(remarks) + + // Extract common fields regardless of entity type + out.Addresses = parseAddresses(remarks) switch strings.ToLower(strings.TrimSpace(sdn.SDNType)) { case "-0-", "": out.Type = search.EntityBusiness - // Set properties out.Business = &search.Business{ Name: sdn.SDNName, } @@ -42,6 +139,11 @@ func ToEntity(sdn SDN) search.Entity[SDN] { "Chamber of Commerce Number", "Chinese Commercial Code", "Registered Charity No.", + "Commercial Registry Number", + "Company Number", + "Enterprise Number", + "Legal Entity Number", + "Registration Number", }) case "individual": @@ -50,6 +152,17 @@ func ToEntity(sdn SDN) search.Entity[SDN] { Name: sdn.SDNName, Gender: search.Gender(strings.ToLower(firstValue(findMatchingRemarks(remarks, "Gender")))), } + + // Title from SDN field needs to be prepended if non-empty + if sdn.Title != "" { + titles = append([]string{sdn.Title}, titles...) + } + out.Titles = titles + + // Extract alternative names + out.Person.AltNames = parseAltNames(remarks) + + // Handle birth date out.Person.BirthDate = withFirstP(findMatchingRemarks(remarks, "DOB"), func(in remark) *time.Time { t, err := parseTime(dobPatterns, in.value) if t.IsZero() || err != nil { @@ -58,15 +171,8 @@ func ToEntity(sdn SDN) search.Entity[SDN] { return &t }) - // TODO(adam): - // citizen Venezuela - // - // nationality Russia; - // nationality: Eritrean - // - // POB 'Adlun, Lebanon - // Alt. POB: Keren Eritrea - // POB Abadan, Iran + // Parse government IDs + out.Person.GovernmentIDs = parseGovernmentIDs(remarks) case "vessel": out.Type = search.EntityVessel @@ -74,21 +180,22 @@ func ToEntity(sdn SDN) search.Entity[SDN] { Name: sdn.SDNName, IMONumber: firstValue(findMatchingRemarks(remarks, "IMO")), Type: withFirstF(findMatchingRemarks(remarks, "Vessel Type"), func(r remark) search.VesselType { - return search.VesselType(r.value) // TODO(adam): OFAC values are not an enum + return normalizeVesselType(r.value) }), - Flag: firstValue(findMatchingRemarks(remarks, "Flag")), // TODO(adam): ISO-3166 - // Built *time.Time `json:"built"` - // Model string `json:"model"` - // Tonnage int `json:"tonnage"` // TODO(adam): remove , and ParseInt - MMSI: firstValue(findMatchingRemarks(remarks, "MMSI")), + Flag: normalizeCountryCode(firstValue(findMatchingRemarks(remarks, "Flag"))), + MMSI: firstValue(findMatchingRemarks(remarks, "MMSI")), + Tonnage: parseTonnage(firstValue(findMatchingRemarks(remarks, "Tonnage"))), + CallSign: sdn.CallSign, + GrossRegisteredTonnage: parseTonnage(sdn.GrossRegisteredTonnage), + Owner: sdn.VesselOwner, } case "aircraft": out.Type = search.EntityAircraft out.Aircraft = &search.Aircraft{ Name: sdn.SDNName, - // Type AircraftType `json:"type"` - Flag: firstValue(findMatchingRemarks(remarks, "Flag")), // TODO(adam): ISO-3166 + Type: normalizeAircraftType(firstValue(findMatchingRemarks(remarks, "Aircraft Type"))), + Flag: normalizeCountryCode(firstValue(findMatchingRemarks(remarks, "Flag"))), Built: withFirstP(findMatchingRemarks(remarks, "Manufacture Date"), func(in remark) *time.Time { t, err := parseTime(dobPatterns, in.value) if t.IsZero() || err != nil { @@ -96,96 +203,321 @@ func ToEntity(sdn SDN) search.Entity[SDN] { } return &t }), - // ICAOCode string `json:"icaoCode"` // ICAO aircraft type designator - Model: firstValue(findMatchingRemarks(remarks, "Aircraft Model")), - SerialNumber: withFirstF(findMatchingRemarks(remarks, "Serial Number"), func(r remark) string { - // Trim parens from these remarks - // e.g. "Aircraft Manufacturer's Serial Number (MSN) 1023409321;" - idx := strings.Index(r.value, ")") - if idx > -1 && len(r.value) > idx+1 { - r.value = strings.TrimSpace(r.value[idx+1:]) - } - return r.value - }), + Model: firstValue(findMatchingRemarks(remarks, "Aircraft Model")), + SerialNumber: parseSerialNumber(remarks), + ICAOCode: firstValue(findMatchingRemarks(remarks, "ICAO Code")), } - - // TODO(adam): - // Aircraft Operator YAS AIR; - // Previous Aircraft Tail Number 2-WGLP } return out } -var parenCountryRegex = regexp.MustCompile(`\(([\w\s]+)\)`) +func parseAltNames(remarks []string) []string { + var names []string + for _, r := range remarks { + matches := akaRegex.FindAllStringSubmatch(r, -1) + for _, m := range matches { + if len(m) > 1 { + name := strings.Trim(m[1], "'") + names = append(names, name) + } + } + } + return names +} + +var ( + // Passports + governmentIDPassportRegex = regexp.MustCompile(`(?i)Passport\s+(?:#|No\.|Number)?\s*([A-Z0-9]+)`) + governmentIDDiplomaticPassRegex = regexp.MustCompile(`(?i)Diplomatic\s+Passport\s+([A-Z0-9]+)`) -func makeIdentifier(remarks []string, suffix string) *search.Identifier { - found := findMatchingRemarks(remarks, suffix) - if len(found) == 0 { - return nil + // Drivers Licenses + governmentIDDriversLicenseRegex = regexp.MustCompile(`(?i)Driver'?s?\s+License\s+(?:No\.|Number)?\s*(?:[A-Z]-)?([A-Z0-9]+)`) + + // National IDs + governmentIDNationalRegex = regexp.MustCompile(`(?i)National\s+ID\s+(?:No\.|Number)?\s*([A-Z0-9-]+)`) + governmentIDPersonalIDRegex = regexp.MustCompile(`(?i)Personal\s+ID\s+(?:Card)?\s*(?:No\.|Number)?\s*([A-Z0-9-]+)`) + + // Tax IDs + governmentIDTaxRegex = regexp.MustCompile(`(?i)Tax\s+ID\s+(?:No\.|Number)?\s*([A-Z0-9-]+)`) + governmentIDCUITRegex = regexp.MustCompile(`(?i)C\.?U\.?I\.?T\.?\s+(?:No\.|Number)?\s*([A-Z0-9-]+)`) + + // Social Security Numbers + governmentIDSSNRegex = regexp.MustCompile(`(?i)SSN\s+([0-9-]+)`) + + // Latin American IDs + governmentIDCedulaRegex = regexp.MustCompile(`(?i)Cedula\s+(?:No\.|Number)?\s*([A-Z0-9-]+)`) + governmentIDCURPRegex = regexp.MustCompile(`(?i)C\.?U\.?R\.?P\.?\s+(?:#|No\.|Number)?\s*([A-Z0-9-]+)`) + + // Electoral IDs + governmentIDElectoralRegex = regexp.MustCompile(`(?i)Electoral\s+Registry\s+(?:No\.|Number)?\s*([A-Z0-9-]+)`) + + // Business Registration + governmentIDBusinessRegistrationRegex = regexp.MustCompile(`(?i)Business\s+Registration\s+(?:No\.|Number|Document)?\s*([A-Z0-9-]+)`) + governmentIDCommercialRegistryRegex = regexp.MustCompile(`(?i)Commercial\s+Registry\s+(?:No\.|Number)?\s*([A-Z0-9-./]+)`) + + // Birth Certificates + governmentIDBirthCertRegex = regexp.MustCompile(`(?i)Birth\s+Certificate\s+(?:No\.|Number)?\s*([A-Z0-9]+)`) + + // Refugee Documents + governmentIDRefugeeRegex = regexp.MustCompile(`(?i)Refugee\s+ID\s+(?:Card)?\s*([A-Z0-9]+)`) +) + +func parseGovernmentIDs(remarks []string) []search.GovernmentID { + var ids []search.GovernmentID + + // Map of regex patterns to GovernmentIDType + idPatterns := map[*regexp.Regexp]search.GovernmentIDType{ + governmentIDPassportRegex: search.GovernmentIDPassport, + governmentIDDriversLicenseRegex: search.GovernmentIDDriversLicense, + governmentIDPassportRegex: search.GovernmentIDPassport, + governmentIDDiplomaticPassRegex: search.GovernmentIDDiplomaticPass, + governmentIDDriversLicenseRegex: search.GovernmentIDDriversLicense, + governmentIDNationalRegex: search.GovernmentIDNational, + governmentIDPersonalIDRegex: search.GovernmentIDPersonalID, + governmentIDTaxRegex: search.GovernmentIDTax, + governmentIDCUITRegex: search.GovernmentIDCUIT, + governmentIDSSNRegex: search.GovernmentIDSSN, + governmentIDCedulaRegex: search.GovernmentIDCedula, + governmentIDCURPRegex: search.GovernmentIDCURP, + governmentIDElectoralRegex: search.GovernmentIDElectoral, + governmentIDBusinessRegistrationRegex: search.GovernmentIDBusinessRegisration, + governmentIDCommercialRegistryRegex: search.GovernmentIDCommercialRegistry, + governmentIDBirthCertRegex: search.GovernmentIDBirthCert, + governmentIDRefugeeRegex: search.GovernmentIDRefugee, } - // Often the country is in parenthesis at the end, so let's look for that - // - // Business Number 51566843 (Hong Kong) - country := parenCountryRegex.FindString(found[0].value) - country = strings.TrimPrefix(strings.TrimSuffix(country, ")"), "(") + for _, r := range remarks { + for re, idType := range idPatterns { + if matches := re.FindStringSubmatch(r); len(matches) > 1 { + // Clean the identifier by removing trailing punctuation + identifier := strings.TrimRight(matches[1], ".;,") - return &search.Identifier{ - Name: found[0].fullName, - Country: country, // ISO-3166 // TODO(adam): - Identifier: found[0].value, + // Extract country and dates if present + country := extractCountry(r) + + // Some IDs have issued/expiry dates - we could add these to the GovernmentID struct + // issued := extractDate(r, "issued") + // expires := extractDate(r, "expires") + + ids = append(ids, search.GovernmentID{ + Type: idType, + Country: normalizeCountryCode(country), + Identifier: identifier, + }) + } + } } + + return ids } -func makeIdentifiers(remarks []string, needles []string) []search.Identifier { - var out []search.Identifier - for i := range needles { - if id := makeIdentifier(remarks, needles[i]); id != nil { - out = append(out, *id) +func normalizeCountryCode(country string) string { + // TODO: Implement conversion to ISO-3166 + return strings.TrimSpace(country) +} + +func normalizeVesselType(vesselType string) search.VesselType { + switch strings.ToLower(strings.TrimSpace(vesselType)) { + case "cargo": + return search.VesselTypeCargo + default: + return search.VesselTypeUnknown + } +} + +func normalizeAircraftType(aircraftType string) search.AircraftType { + switch strings.ToLower(strings.TrimSpace(aircraftType)) { + case "cargo": + return search.AircraftCargo + default: + return search.AircraftTypeUnknown + } +} + +func parseTonnage(value string) int { + // Remove commas and convert to int + value = strings.ReplaceAll(value, ",", "") + tonnage, _ := strconv.Atoi(value) + return tonnage +} + +func parseSerialNumber(remarks []string) string { + for _, r := range findMatchingRemarks(remarks, "Serial Number") { + // Remove parenthetical content and clean + idx := strings.Index(r.value, ")") + if idx > -1 && len(r.value) > idx+1 { + return strings.TrimSpace(r.value[idx+1:]) } + return strings.TrimSpace(r.value) } - return out + return "" +} + +func parseAddresses(remarks []string) []search.Address { + // TODO: Implement address parsing + return nil } var ( - dobPatterns = []string{ - "02 Jan 2006", // 01 Apr 1950 - "Jan 2006", // Sep 1958 - "2006", // 1928 - } + // Regular expressions for parsing relationships and sanctions + linkedToRegex = regexp.MustCompile(`(?i)Linked\s+To:\s+([^;]+)`) + subsidiaryRegex = regexp.MustCompile(`(?i)Subsidiary\s+Of:\s+([^;]+)`) + ownedByRegex = regexp.MustCompile(`(?i)(?:Owned|Controlled)\s+By:\s+([^;]+)`) + sanctionsRegex = regexp.MustCompile(`(?i)Additional\s+Sanctions\s+Information\s+-\s+([^;]+)`) + formerNameRegex = regexp.MustCompile(`(?i)(?:Former|Previous|f\.k\.a\.|p\.k\.a\.)\s+(?:Name|Vessel):\s+([^;]+)`) + titleRegex = regexp.MustCompile(`(?i)Title:\s+([^;]+)`) + + cryptoAddressRegex = regexp.MustCompile(`(?i)Digital\s+Currency\s+Address\s+-\s+([A-Z0-9]+)\s+([A-Z0-9]+)`) // Matches "Digital Currency Address - XBT 1234abc..." ) -func parseTime(acceptedLayouts []string, value string) (time.Time, error) { - // We don't currently support ranges for birth dates, so take the first date provided - // Examples include: - // 01 Feb 1958 to 28 Feb 1958 - // circa 1934 - // circa 1979-1982 - value = strings.TrimSpace(strings.ReplaceAll(value, "circa", "")) +func parseRemarks(remarks []string) ([]search.Affiliation, *search.SanctionsInfo, []search.HistoricalInfo, []string) { + var affiliations []search.Affiliation + var historicalInfo []search.HistoricalInfo + var titles []string + sanctionsInfo := &search.SanctionsInfo{} - parts := strings.Split(value, "to") - if len(parts) > 1 { - value = parts[0] - } else { - parts = strings.Split(value, "-") - if len(parts) > 1 { - value = parts[0] + for _, remark := range remarks { + // Parse affiliations + if matches := linkedToRegex.FindAllStringSubmatch(remark, -1); matches != nil { + for _, m := range matches { + affiliations = append(affiliations, search.Affiliation{ + EntityName: strings.TrimSpace(m[1]), + Type: "Linked To", + }) + } + } + + // Parse subsidiary relationships + if matches := subsidiaryRegex.FindAllStringSubmatch(remark, -1); matches != nil { + for _, m := range matches { + affiliations = append(affiliations, search.Affiliation{ + EntityName: strings.TrimSpace(m[1]), + Type: "Subsidiary Of", + }) + } + } + + // Parse owned/controlled by relationships + if matches := ownedByRegex.FindAllStringSubmatch(remark, -1); matches != nil { + for _, m := range matches { + affiliations = append(affiliations, search.Affiliation{ + EntityName: strings.TrimSpace(m[1]), + Type: "Subsidiary Of", + }) + } + } + + // Parse sanctions information + if matches := sanctionsRegex.FindStringSubmatch(remark); matches != nil { + info := strings.TrimSpace(matches[1]) + sanctionsInfo.Description = info + if strings.Contains(strings.ToLower(info), "secondary sanctions") { + sanctionsInfo.Secondary = true + } + } + + // Parse historical information + if matches := formerNameRegex.FindAllStringSubmatch(remark, -1); matches != nil { + for _, m := range matches { + historicalInfo = append(historicalInfo, search.HistoricalInfo{ + Type: "Former Name", + Value: strings.TrimSpace(m[1]), + }) + } + } + + // Parse titles + if matches := titleRegex.FindAllStringSubmatch(remark, -1); matches != nil { + for _, m := range matches { + titles = append(titles, strings.TrimSpace(m[1])) + } } } - value = strings.TrimSpace(value) - for i := range acceptedLayouts { - tt, err := time.Parse(acceptedLayouts[i], value) - if !tt.IsZero() && err == nil { - return tt, nil + return deduplicateAffiliations(affiliations), + sanitizeSanctionsInfo(sanctionsInfo), + deduplicateHistoricalInfo(historicalInfo), + deduplicateTitles(titles) +} + +func deduplicateAffiliations(affiliations []search.Affiliation) []search.Affiliation { + seen := make(map[string]bool) + var result []search.Affiliation + + for _, aff := range affiliations { + key := aff.Type + "|" + aff.EntityName + if !seen[key] { + seen[key] = true + result = append(result, aff) } } - return time.Time{}, nil + return result +} + +func sanitizeSanctionsInfo(info *search.SanctionsInfo) *search.SanctionsInfo { + if info == nil || (info.Description == "" && !info.Secondary && len(info.Programs) == 0) { + return nil + } + return info } -// TODO(adam): -// Drop "alt. " +func deduplicateHistoricalInfo(info []search.HistoricalInfo) []search.HistoricalInfo { + seen := make(map[string]bool) + var result []search.HistoricalInfo + + for _, hi := range info { + key := hi.Type + "|" + hi.Value + if !seen[key] { + seen[key] = true + result = append(result, hi) + } + } + return result +} + +func deduplicateTitles(titles []string) []string { + seen := make(map[string]bool) + var result []string + + for _, title := range titles { + if !seen[title] { + seen[title] = true + result = append(result, title) + } + } + return result +} + +func parseCryptoAddresses(remarks []string) []search.CryptoAddress { + var addresses []search.CryptoAddress + + for _, remark := range remarks { + matches := cryptoAddressRegex.FindAllStringSubmatch(remark, -1) + for _, m := range matches { + if len(m) > 2 { + addresses = append(addresses, search.CryptoAddress{ + Currency: strings.TrimSpace(m[1]), + Address: strings.TrimSpace(m[2]), + }) + } + } + } + + // Deduplicate addresses + seen := make(map[string]bool) + var unique []search.CryptoAddress + + for _, addr := range addresses { + key := addr.Currency + "|" + addr.Address + if !seen[key] { + seen[key] = true + unique = append(unique, addr) + } + } + + return unique +} // ContactInfo // Fax: 0097282858208. @@ -213,10 +545,8 @@ func parseTime(acceptedLayouts []string, value string) (time.Time, error) { // Website Oboronlogistika.ru // Website http://comitet.su/about/ // http://www.saraproperties.co.uk (website). - // a.k.a. 'ABU AHMAD ISHAB'. // a.k.a. 'ZAMANI, Aziz Shah' - // GovernmentIDs // // Cedula No. 94428531 (Colombia) diff --git a/pkg/ofac/mapper_business_test.go b/pkg/ofac/mapper_business_test.go new file mode 100644 index 00000000..08e293bc --- /dev/null +++ b/pkg/ofac/mapper_business_test.go @@ -0,0 +1,78 @@ +package ofac + +import ( + "sort" + "testing" + + "github.com/moov-io/watchman/pkg/search" + + "github.com/stretchr/testify/require" +) + +func TestMapper__CompleteBusiness(t *testing.T) { + sdn := &SDN{ + EntityID: "12345", + SDNName: "ACME CORPORATION", + SDNType: "-0-", + Remarks: "Business Registration Number 51566843 (Hong Kong); Commercial Registry Number CH-020.1.066.499-9 (Switzerland); Company Number 05527424 (United Kingdom)", + } + + e := ToEntity(*sdn) + require.Equal(t, "ACME CORPORATION", e.Name) + require.Equal(t, search.EntityBusiness, e.Type) + require.Equal(t, search.SourceUSOFAC, e.Source) + + require.NotNil(t, e.Business) + require.Equal(t, "ACME CORPORATION", e.Business.Name) + require.Len(t, e.Business.Identifier, 3) + + // Sort the identifiers to ensure consistent ordering for tests + identifiers := e.Business.Identifier + sort.Slice(identifiers, func(i, j int) bool { + return identifiers[i].Country < identifiers[j].Country + }) + + // Verify identifiers + require.Equal(t, "Hong Kong", identifiers[0].Country) + require.Equal(t, "Business Registration Number", identifiers[0].Name) + require.Equal(t, "51566843", identifiers[0].Identifier) + + require.Equal(t, "Switzerland", identifiers[1].Country) + require.Equal(t, "Commercial Registry Number", identifiers[1].Name) + require.Equal(t, "CH-020.1.066.499-9", identifiers[1].Identifier) + + require.Equal(t, "United Kingdom", identifiers[2].Country) + require.Equal(t, "Company Number", identifiers[2].Name) + require.Equal(t, "05527424", identifiers[2].Identifier) + + // Verify other entity types are nil + require.Nil(t, e.Person) + require.Nil(t, e.Organization) + require.Nil(t, e.Aircraft) + require.Nil(t, e.Vessel) +} + +func TestMapper__CompleteBusinessWithRemarks(t *testing.T) { + sdn := &SDN{ + EntityID: "12345", + SDNName: "ACME CORPORATION", + SDNType: "-0-", + Remarks: "Business Registration Number 51566843 (Hong Kong); Subsidiary Of: PARENT CORP; Former Name: OLD ACME LTD; Additional Sanctions Information - Subject to Secondary Sanctions", + } + + e := ToEntity(*sdn) + + // Test affiliations + require.Len(t, e.Affiliations, 1) + require.Equal(t, "PARENT CORP", e.Affiliations[0].EntityName) + require.Equal(t, "Subsidiary Of", e.Affiliations[0].Type) + + // Test sanctions info + require.NotNil(t, e.SanctionsInfo) + require.True(t, e.SanctionsInfo.Secondary) + + // Test historical info + require.Len(t, e.HistoricalInfo, 1) + require.Equal(t, "Former Name", e.HistoricalInfo[0].Type) + require.Equal(t, "OLD ACME LTD", e.HistoricalInfo[0].Value) +} diff --git a/pkg/ofac/mapper_person_test.go b/pkg/ofac/mapper_person_test.go new file mode 100644 index 00000000..50fae703 --- /dev/null +++ b/pkg/ofac/mapper_person_test.go @@ -0,0 +1,173 @@ +package ofac + +import ( + "path/filepath" + "testing" + "time" + + "github.com/moov-io/watchman/pkg/search" + + "github.com/stretchr/testify/require" +) + +func TestMapper__Person(t *testing.T) { + res, err := Read(testInputs(t, filepath.Join("..", "..", "test", "testdata", "sdn.csv"))) + require.NoError(t, err) + + var sdn *SDN + for i := range res.SDNs { + if res.SDNs[i].EntityID == "15102" { + sdn = res.SDNs[i] + } + } + require.NotNil(t, sdn) + + e := ToEntity(*sdn) + require.Equal(t, "MORENO, Daniel", e.Name) + require.Equal(t, search.EntityPerson, e.Type) + require.Equal(t, search.SourceUSOFAC, e.Source) + + require.NotNil(t, e.Person) + require.Equal(t, "MORENO, Daniel", e.Person.Name) + require.Equal(t, "", string(e.Person.Gender)) + require.Equal(t, "1972-10-12T00:00:00Z", e.Person.BirthDate.Format(time.RFC3339)) + require.Nil(t, e.Person.DeathDate) + require.Len(t, e.Person.GovernmentIDs, 1) + + passport := e.Person.GovernmentIDs[0] + require.Equal(t, search.GovernmentIDPassport, passport.Type) + require.Equal(t, "Belize", passport.Country) + require.Equal(t, "0291622", passport.Identifier) + + require.Nil(t, e.Business) + require.Nil(t, e.Organization) + require.Nil(t, e.Aircraft) + require.Nil(t, e.Vessel) + + require.Equal(t, "15102", e.SourceData.EntityID) +} + +func TestMapper__CompletePerson(t *testing.T) { + sdn := &SDN{ + EntityID: "26057", + SDNName: "AL-ZAYDI, Shibl Muhsin 'Ubayd", + SDNType: "individual", + Remarks: "DOB 28 Oct 1968; POB Baghdad, Iraq; Additional Sanctions Information - Subject to Secondary Sanctions Pursuant to the Hizballah Financial Sanctions Regulations; alt. Additional Sanctions Information - Subject to Secondary Sanctions; Gender Male; a.k.a. 'SHIBL, Hajji'; nationality Iran; Passport A123456 (Iran) expires 2024; Driver's License No. 04900377 (Moldova) issued 02 Jul 2004; Email Address test@example.com; Phone: +1-123-456-7890; Fax: +1-123-456-7899", + } + + e := ToEntity(*sdn) + require.Equal(t, "AL-ZAYDI, Shibl Muhsin 'Ubayd", e.Name) + require.Equal(t, search.EntityPerson, e.Type) + require.Equal(t, search.SourceUSOFAC, e.Source) + + // Person specific fields + require.NotNil(t, e.Person) + require.Equal(t, "AL-ZAYDI, Shibl Muhsin 'Ubayd", e.Person.Name) + require.Equal(t, search.GenderMale, e.Person.Gender) + require.Equal(t, "1968-10-28T00:00:00Z", e.Person.BirthDate.Format(time.RFC3339)) + require.Nil(t, e.Person.DeathDate) + + // Test alt names + require.Len(t, e.Person.AltNames, 1) + require.Equal(t, "SHIBL, Hajji", e.Person.AltNames[0]) + + // Test government IDs + require.Len(t, e.Person.GovernmentIDs, 2) + var passport, license *search.GovernmentID + for i := range e.Person.GovernmentIDs { + if e.Person.GovernmentIDs[i].Type == search.GovernmentIDPassport { + passport = &e.Person.GovernmentIDs[i] + } else { + license = &e.Person.GovernmentIDs[i] + } + } + require.NotNil(t, passport) + require.Equal(t, "Iran", passport.Country) + require.Equal(t, "A123456", passport.Identifier) + + require.NotNil(t, license) + require.Equal(t, "Moldova", license.Country) + require.Equal(t, "04900377", license.Identifier) + + // Verify other entity types are nil + require.Nil(t, e.Business) + require.Nil(t, e.Organization) + require.Nil(t, e.Aircraft) + require.Nil(t, e.Vessel) +} + +func TestParseAltNames(t *testing.T) { + tests := []struct { + remarks []string + expected []string + }{ + { + remarks: []string{"a.k.a. 'SMITH, John'"}, + expected: []string{"SMITH, John"}, + }, + { + remarks: []string{"a.k.a. 'SMITH, John'; a.k.a. 'DOE, Jane'"}, + expected: []string{"SMITH, John", "DOE, Jane"}, + }, + { + remarks: []string{"Some other remark", "a.k.a. 'SMITH, John'"}, + expected: []string{"SMITH, John"}, + }, + { + remarks: []string{}, + expected: nil, + }, + } + + for _, tt := range tests { + result := parseAltNames(tt.remarks) + require.Equal(t, tt.expected, result) + } +} + +func TestMapper__CompletePersonWithRemarks(t *testing.T) { + sdn := &SDN{ + EntityID: "26057", + SDNName: "AL-ZAYDI, Shibl Muhsin 'Ubayd", + SDNType: "individual", + Remarks: "DOB 28 Oct 1968; POB Baghdad, Iraq; Gender Male; Title: Commander; Former Name: AL-ZAYDI, Muhammad; Linked To: ISLAMIC REVOLUTIONARY GUARD CORPS (IRGC)-QODS FORCE; Additional Sanctions Information - Subject to Secondary Sanctions", + } + + e := ToEntity(*sdn) + + // Test affiliations + require.Len(t, e.Affiliations, 1) + require.Equal(t, "ISLAMIC REVOLUTIONARY GUARD CORPS (IRGC)-QODS FORCE", e.Affiliations[0].EntityName) + require.Equal(t, "Linked To", e.Affiliations[0].Type) + + // Test sanctions info + require.NotNil(t, e.SanctionsInfo) + require.True(t, e.SanctionsInfo.Secondary) + require.Equal(t, "Subject to Secondary Sanctions", e.SanctionsInfo.Description) + + // Test historical info + require.Len(t, e.HistoricalInfo, 1) + require.Equal(t, "Former Name", e.HistoricalInfo[0].Type) + require.Equal(t, "AL-ZAYDI, Muhammad", e.HistoricalInfo[0].Value) + + // Test titles + require.Equal(t, []string{"Commander"}, e.Titles) +} + +func TestMapper__PersonWithTitle(t *testing.T) { + sdn := &SDN{ + EntityID: "12345", + SDNName: "SMITH, John", + SDNType: "individual", + Title: "Chief Financial Officer", + Remarks: "Title: Regional Director", + } + + e := ToEntity(*sdn) + require.Equal(t, "SMITH, John", e.Name) + require.Equal(t, search.EntityPerson, e.Type) + + // Should have both titles - from SDN field and remarks + require.Contains(t, e.Titles, "Chief Financial Officer") + require.Contains(t, e.Titles, "Regional Director") +} diff --git a/pkg/ofac/mapper_test.go b/pkg/ofac/mapper_test.go index 57122aad..721a9fa5 100644 --- a/pkg/ofac/mapper_test.go +++ b/pkg/ofac/mapper_test.go @@ -4,6 +4,7 @@ import ( "io" "os" "path/filepath" + "sort" "testing" "time" @@ -27,100 +28,6 @@ func testInputs(tb testing.TB, paths ...string) map[string]io.ReadCloser { return input } -func TestMapper__Person(t *testing.T) { - res, err := Read(testInputs(t, filepath.Join("..", "..", "test", "testdata", "sdn.csv"))) - require.NoError(t, err) - - var sdn *SDN - for i := range res.SDNs { - if res.SDNs[i].EntityID == "15102" { - sdn = res.SDNs[i] - } - } - require.NotNil(t, sdn) - - e := ToEntity(*sdn) - require.Equal(t, "MORENO, Daniel", e.Name) - require.Equal(t, search.EntityPerson, e.Type) - require.Equal(t, search.SourceUSOFAC, e.Source) - - require.NotNil(t, e.Person) - require.Equal(t, "MORENO, Daniel", e.Person.Name) - require.Equal(t, "", string(e.Person.Gender)) - require.Equal(t, "1972-10-12T00:00:00Z", e.Person.BirthDate.Format(time.RFC3339)) - require.Nil(t, e.Person.DeathDate) - require.Len(t, e.Person.GovernmentIDs, 0) - - require.Nil(t, e.Business) - require.Nil(t, e.Organization) - require.Nil(t, e.Aircraft) - require.Nil(t, e.Vessel) - - require.Equal(t, "15102", e.SourceData.EntityID) -} - -func TestMapper__Vessel(t *testing.T) { - res, err := Read(testInputs(t, filepath.Join("..", "..", "test", "testdata", "sdn.csv"))) - require.NoError(t, err) - - var sdn *SDN - for i := range res.SDNs { - if res.SDNs[i].EntityID == "15036" { - sdn = res.SDNs[i] - } - } - require.NotNil(t, sdn) - - e := ToEntity(*sdn) - require.Equal(t, "ARTAVIL", e.Name) - require.Equal(t, search.EntityVessel, e.Type) - require.Equal(t, search.SourceUSOFAC, e.Source) - - require.Nil(t, e.Person) - require.Nil(t, e.Business) - require.Nil(t, e.Organization) - require.Nil(t, e.Aircraft) - require.NotNil(t, e.Vessel) - - require.Equal(t, "ARTAVIL", e.Vessel.Name) - require.Equal(t, "Malta", e.Vessel.Flag) - require.Equal(t, "9187629", e.Vessel.IMONumber) - require.Equal(t, "572469210", e.Vessel.MMSI) - - require.Equal(t, "15036", e.SourceData.EntityID) -} - -func TestMapper__Aircraft(t *testing.T) { - res, err := Read(testInputs(t, filepath.Join("..", "..", "test", "testdata", "sdn.csv"))) - require.NoError(t, err) - - var sdn *SDN - for i := range res.SDNs { - if res.SDNs[i].EntityID == "18158" { - sdn = res.SDNs[i] - } - } - require.NotNil(t, sdn) - - e := ToEntity(*sdn) - require.Equal(t, "MSN 550", e.Name) - require.Equal(t, search.EntityAircraft, e.Type) - require.Equal(t, search.SourceUSOFAC, e.Source) - - require.Nil(t, e.Person) - require.Nil(t, e.Business) - require.Nil(t, e.Organization) - require.NotNil(t, e.Aircraft) - require.Nil(t, e.Vessel) - - require.Equal(t, "MSN 550", e.Aircraft.Name) - require.Equal(t, "1995-01-01", e.Aircraft.Built.Format(time.DateOnly)) - require.Equal(t, "Airbus A321-131", e.Aircraft.Model) - require.Equal(t, "550", e.Aircraft.SerialNumber) - - require.Equal(t, "18158", e.SourceData.EntityID) -} - func TestParseTime(t *testing.T) { t.Run("DOB", func(t *testing.T) { tt, _ := parseTime(dobPatterns, "01 Apr 1950") @@ -148,3 +55,306 @@ func TestParseTime(t *testing.T) { require.Equal(t, "1979-01-01", tt.Format(time.DateOnly)) }) } + +func TestParseGovernmentIDs(t *testing.T) { + tests := []struct { + name string + remarks []string + want []search.GovernmentID + }{ + { + name: "passport only", + remarks: []string{ + "Passport A123456 (Iran) expires 2024", + }, + want: []search.GovernmentID{ + { + Type: search.GovernmentIDPassport, + Country: "Iran", + Identifier: "A123456", + }, + }, + }, + { + name: "drivers license only", + remarks: []string{ + "Driver's License No. 04900377 (Moldova) issued 02 Jul 2004", + }, + want: []search.GovernmentID{ + { + Type: search.GovernmentIDDriversLicense, + Country: "Moldova", + Identifier: "04900377", + }, + }, + }, + { + name: "multiple IDs", + remarks: []string{ + "Passport A123456 (Iran) expires 2024", + "Driver's License No. 04900377 (Moldova) issued 02 Jul 2004", + }, + want: []search.GovernmentID{ + { + Type: search.GovernmentIDPassport, + Country: "Iran", + Identifier: "A123456", + }, + { + Type: search.GovernmentIDDriversLicense, + Country: "Moldova", + Identifier: "04900377", + }, + }, + }, + { + name: "various driver license formats", + remarks: []string{ + "Driver License M600161650080 (United States)", + "Drivers License No. B-12345 (Canada)", + "Driver's License Number 987654321 (Mexico)", + }, + want: []search.GovernmentID{ + { + Type: search.GovernmentIDDriversLicense, + Country: "United States", + Identifier: "M600161650080", + }, + { + Type: search.GovernmentIDDriversLicense, + Country: "Canada", + Identifier: "12345", + }, + { + Type: search.GovernmentIDDriversLicense, + Country: "Mexico", + Identifier: "987654321", + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parseGovernmentIDs(tt.remarks) + require.Equal(t, tt.want, got) + }) + } +} + +func TestParseRemarks(t *testing.T) { + tests := []struct { + name string + remarks []string + wantAffiliations []search.Affiliation + wantSanctions *search.SanctionsInfo + wantHistorical []search.HistoricalInfo + wantTitles []string + }{ + { + name: "complete remarks", + remarks: []string{ + "Linked To: ISLAMIC REVOLUTIONARY GUARD CORPS (IRGC)-QODS FORCE; " + + "Subsidiary Of: BANK OF IRAN; " + + "Additional Sanctions Information - Subject to Secondary Sanctions; " + + "Former Name: TEHRAN BANK; Former Name: GLORY; " + + "Title: Director; Title: Board Member", + }, + wantAffiliations: []search.Affiliation{ + { + EntityName: "ISLAMIC REVOLUTIONARY GUARD CORPS (IRGC)-QODS FORCE", + Type: "Linked To", + }, + { + EntityName: "BANK OF IRAN", + Type: "Subsidiary Of", + }, + }, + wantSanctions: &search.SanctionsInfo{ + Description: "Subject to Secondary Sanctions", + Secondary: true, + }, + wantHistorical: []search.HistoricalInfo{ + { + Type: "Former Name", + Value: "TEHRAN BANK", + }, + { + Type: "Former Name", + Value: "GLORY", + }, + }, + wantTitles: []string{ + "Director", + "Board Member", + }, + }, + { + name: "deduplication test", + remarks: []string{ + "Linked To: CORP A; Linked To: CORP A", // Duplicate affiliation + "Former Name: OLD NAME; Former Name: OLD NAME", // Duplicate historical + "Title: CEO; Title: CEO", // Duplicate title + }, + wantAffiliations: []search.Affiliation{ + { + EntityName: "CORP A", + Type: "Linked To", + }, + }, + wantSanctions: nil, + wantHistorical: []search.HistoricalInfo{ + { + Type: "Former Name", + Value: "OLD NAME", + }, + }, + wantTitles: []string{"CEO"}, + }, + { + name: "multiple relationships", + remarks: []string{ + "Linked To: CORP A; Controlled By: CORP B; Owned By: CORP C", + }, + wantAffiliations: []search.Affiliation{ + { + EntityName: "CORP A", + Type: "Linked To", + }, + { + EntityName: "CORP B", + Type: "Subsidiary Of", + }, + { + EntityName: "CORP C", + Type: "Subsidiary Of", + }, + }, + wantSanctions: nil, + wantHistorical: nil, + wantTitles: nil, + }, + { + name: "empty remarks", + remarks: []string{}, + wantAffiliations: nil, + wantSanctions: nil, + wantHistorical: nil, + wantTitles: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotAff, gotSanc, gotHist, gotTitles := parseRemarks(tt.remarks) + + // Sort affiliations for consistent comparison + sort.Slice(gotAff, func(i, j int) bool { + if gotAff[i].Type != gotAff[j].Type { + return gotAff[i].Type < gotAff[j].Type + } + return gotAff[i].EntityName < gotAff[j].EntityName + }) + sort.Slice(tt.wantAffiliations, func(i, j int) bool { + if tt.wantAffiliations[i].Type != tt.wantAffiliations[j].Type { + return tt.wantAffiliations[i].Type < tt.wantAffiliations[j].Type + } + return tt.wantAffiliations[i].EntityName < tt.wantAffiliations[j].EntityName + }) + + // Test affiliations + require.Equal(t, tt.wantAffiliations, gotAff) + + // Test sanctions info + if tt.wantSanctions == nil { + require.Nil(t, gotSanc) + } else { + require.Equal(t, tt.wantSanctions.Description, gotSanc.Description) + require.Equal(t, tt.wantSanctions.Secondary, gotSanc.Secondary) + } + + // Sort historical info + sort.Slice(gotHist, func(i, j int) bool { + return gotHist[i].Value < gotHist[j].Value + }) + sort.Slice(tt.wantHistorical, func(i, j int) bool { + return tt.wantHistorical[i].Value < tt.wantHistorical[j].Value + }) + + // Test historical info + require.Equal(t, tt.wantHistorical, gotHist) + + // Sort titles + sort.Strings(gotTitles) + sort.Strings(tt.wantTitles) + + // Test titles + require.Equal(t, tt.wantTitles, gotTitles) + }) + } +} + +func TestParseCryptoAddresses(t *testing.T) { + tests := []struct { + name string + remarks []string + want []search.CryptoAddress + }{ + { + name: "single address", + remarks: []string{ + "Digital Currency Address - XBT bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4", + }, + want: []search.CryptoAddress{ + { + Currency: "XBT", + Address: "bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4", + }, + }, + }, + { + name: "multiple addresses", + remarks: []string{ + "Digital Currency Address - XBT bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4", + "Digital Currency Address - ETH 0xb794f5ea0ba39494ce839613fffba74279579268", + }, + want: []search.CryptoAddress{ + { + Currency: "XBT", + Address: "bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4", + }, + { + Currency: "ETH", + Address: "0xb794f5ea0ba39494ce839613fffba74279579268", + }, + }, + }, + { + name: "duplicate addresses", + remarks: []string{ + "Digital Currency Address - XBT bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4", + "Digital Currency Address - XBT bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4", + }, + want: []search.CryptoAddress{ + { + Currency: "XBT", + Address: "bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4", + }, + }, + }, + { + name: "no addresses", + remarks: []string{ + "Some other remark", + }, + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parseCryptoAddresses(tt.remarks) + require.Equal(t, tt.want, got) + }) + } +} diff --git a/pkg/ofac/mapper_vehicles_test.go b/pkg/ofac/mapper_vehicles_test.go new file mode 100644 index 00000000..8654e0f9 --- /dev/null +++ b/pkg/ofac/mapper_vehicles_test.go @@ -0,0 +1,196 @@ +package ofac + +import ( + "path/filepath" + "testing" + "time" + + "github.com/moov-io/watchman/pkg/search" + + "github.com/stretchr/testify/require" +) + +func TestMapper__Vessel(t *testing.T) { + res, err := Read(testInputs(t, filepath.Join("..", "..", "test", "testdata", "sdn.csv"))) + require.NoError(t, err) + + var sdn *SDN + for i := range res.SDNs { + if res.SDNs[i].EntityID == "15036" { + sdn = res.SDNs[i] + } + } + require.NotNil(t, sdn) + + e := ToEntity(*sdn) + require.Equal(t, "ARTAVIL", e.Name) + require.Equal(t, search.EntityVessel, e.Type) + require.Equal(t, search.SourceUSOFAC, e.Source) + + require.Nil(t, e.Person) + require.Nil(t, e.Business) + require.Nil(t, e.Organization) + require.Nil(t, e.Aircraft) + require.NotNil(t, e.Vessel) + + require.Equal(t, "ARTAVIL", e.Vessel.Name) + require.Equal(t, "Malta", e.Vessel.Flag) + require.Equal(t, "9187629", e.Vessel.IMONumber) + require.Equal(t, "572469210", e.Vessel.MMSI) + + require.Equal(t, "15036", e.SourceData.EntityID) +} + +func TestMapper__Aircraft(t *testing.T) { + res, err := Read(testInputs(t, filepath.Join("..", "..", "test", "testdata", "sdn.csv"))) + require.NoError(t, err) + + var sdn *SDN + for i := range res.SDNs { + if res.SDNs[i].EntityID == "18158" { + sdn = res.SDNs[i] + } + } + require.NotNil(t, sdn) + + e := ToEntity(*sdn) + require.Equal(t, "MSN 550", e.Name) + require.Equal(t, search.EntityAircraft, e.Type) + require.Equal(t, search.SourceUSOFAC, e.Source) + + require.Nil(t, e.Person) + require.Nil(t, e.Business) + require.Nil(t, e.Organization) + require.NotNil(t, e.Aircraft) + require.Nil(t, e.Vessel) + + require.Equal(t, "MSN 550", e.Aircraft.Name) + require.Equal(t, "1995-01-01", e.Aircraft.Built.Format(time.DateOnly)) + require.Equal(t, "Airbus A321-131", e.Aircraft.Model) + require.Equal(t, "550", e.Aircraft.SerialNumber) + + require.Equal(t, "18158", e.SourceData.EntityID) +} + +func TestMapper__CompleteVessel(t *testing.T) { + sdn := &SDN{ + EntityID: "67890", + SDNName: "CARGO VESSEL X", + SDNType: "vessel", + Remarks: "Vessel Type Cargo; Flag Malta; IMO 9999999; MMSI 123456789; Tonnage 50,000", + } + + e := ToEntity(*sdn) + require.Equal(t, "CARGO VESSEL X", e.Name) + require.Equal(t, search.EntityVessel, e.Type) + require.Equal(t, search.SourceUSOFAC, e.Source) + + require.NotNil(t, e.Vessel) + require.Equal(t, "CARGO VESSEL X", e.Vessel.Name) + require.Equal(t, search.VesselTypeCargo, e.Vessel.Type) + require.Equal(t, "Malta", e.Vessel.Flag) + require.Equal(t, "9999999", e.Vessel.IMONumber) + require.Equal(t, "123456789", e.Vessel.MMSI) + require.Equal(t, 50000, e.Vessel.Tonnage) + + // Verify other entity types are nil + require.Nil(t, e.Person) + require.Nil(t, e.Business) + require.Nil(t, e.Organization) + require.Nil(t, e.Aircraft) +} + +func TestMapper__CompleteAircraft(t *testing.T) { + sdn := &SDN{ + EntityID: "54321", + SDNName: "AIRCRAFT Y", + SDNType: "aircraft", + Remarks: "Aircraft Type Cargo; Flag United States; Aircraft Model Boeing 747; Manufacture Date 01 Jan 1995; Serial Number (MSN) 12345; ICAO Code B744", + } + + e := ToEntity(*sdn) + require.Equal(t, "AIRCRAFT Y", e.Name) + require.Equal(t, search.EntityAircraft, e.Type) + require.Equal(t, search.SourceUSOFAC, e.Source) + + require.NotNil(t, e.Aircraft) + require.Equal(t, "AIRCRAFT Y", e.Aircraft.Name) + require.Equal(t, search.AircraftCargo, e.Aircraft.Type) + require.Equal(t, "United States", e.Aircraft.Flag) + require.Equal(t, "Boeing 747", e.Aircraft.Model) + require.Equal(t, "1995-01-01", e.Aircraft.Built.Format(time.DateOnly)) + require.Equal(t, "12345", e.Aircraft.SerialNumber) + require.Equal(t, "B744", e.Aircraft.ICAOCode) + + // Verify other entity types are nil + require.Nil(t, e.Person) + require.Nil(t, e.Business) + require.Nil(t, e.Organization) + require.Nil(t, e.Vessel) +} + +func TestNormalizeVesselType(t *testing.T) { + tests := []struct { + input string + expected search.VesselType + }{ + {"cargo", search.VesselTypeCargo}, + {"Cargo", search.VesselTypeCargo}, + {"CARGO", search.VesselTypeCargo}, + {"unknown", search.VesselTypeUnknown}, + {"", search.VesselTypeUnknown}, + } + + for _, tt := range tests { + result := normalizeVesselType(tt.input) + require.Equal(t, tt.expected, result) + } +} + +func TestNormalizeAircraftType(t *testing.T) { + tests := []struct { + input string + expected search.AircraftType + }{ + {"cargo", search.AircraftCargo}, + {"Cargo", search.AircraftCargo}, + {"CARGO", search.AircraftCargo}, + {"unknown", search.AircraftTypeUnknown}, + {"", search.AircraftTypeUnknown}, + } + + for _, tt := range tests { + result := normalizeAircraftType(tt.input) + require.Equal(t, tt.expected, result) + } +} + +func TestMapper__CompleteVesselWithAllFields(t *testing.T) { + sdn := &SDN{ + EntityID: "67890", + SDNName: "CARGO VESSEL X", + SDNType: "vessel", + CallSign: "ABC123", + GrossRegisteredTonnage: "25,000", + VesselOwner: "SHIPPING CORP", + Remarks: "Vessel Type Cargo; Flag Malta; IMO 9999999; MMSI 123456789; Tonnage 50,000", + } + + e := ToEntity(*sdn) + require.Equal(t, "CARGO VESSEL X", e.Name) + require.Equal(t, search.EntityVessel, e.Type) + require.Equal(t, search.SourceUSOFAC, e.Source) + + require.NotNil(t, e.Vessel) + require.Equal(t, "CARGO VESSEL X", e.Vessel.Name) + require.Equal(t, search.VesselTypeCargo, e.Vessel.Type) + require.Equal(t, "Malta", e.Vessel.Flag) + require.Equal(t, "9999999", e.Vessel.IMONumber) + require.Equal(t, "123456789", e.Vessel.MMSI) + require.Equal(t, 50000, e.Vessel.Tonnage) + + // Test new fields + require.Equal(t, "ABC123", e.Vessel.CallSign) + require.Equal(t, 25000, e.Vessel.GrossRegisteredTonnage) + require.Equal(t, "SHIPPING CORP", e.Vessel.Owner) +} diff --git a/pkg/search/models.go b/pkg/search/models.go index 861e8673..22bdee5b 100644 --- a/pkg/search/models.go +++ b/pkg/search/models.go @@ -21,6 +21,11 @@ type Entity[T any] struct { Addresses []Address `json:"addresses"` + Affiliations []Affiliation `json:"affiliations"` + SanctionsInfo *SanctionsInfo `json:"sanctionsInfo"` + HistoricalInfo []HistoricalInfo `json:"historicalInfo"` + Titles []string `json:"titles"` + SourceData T `json:"sourceData"` // Contains all original list data with source list naming } @@ -49,6 +54,7 @@ type Person struct { Gender Gender `json:"gender"` BirthDate *time.Time `json:"birthDate"` DeathDate *time.Time `json:"deathDate"` + Titles []string `json:"titles"` GovernmentIDs []GovernmentID `json:"governmentIDs"` } @@ -85,7 +91,21 @@ type GovernmentID struct { type GovernmentIDType string var ( - GovernmentIDPassport GovernmentIDType = "passport" + GovernmentIDPassport GovernmentIDType = "passport" + GovernmentIDDriversLicense GovernmentIDType = "drivers-license" + GovernmentIDNational GovernmentIDType = "national-id" + GovernmentIDTax GovernmentIDType = "tax-id" + GovernmentIDSSN GovernmentIDType = "ssn" + GovernmentIDCedula GovernmentIDType = "cedula" + GovernmentIDCURP GovernmentIDType = "curp" + GovernmentIDCUIT GovernmentIDType = "cuit" + GovernmentIDElectoral GovernmentIDType = "electoral" + GovernmentIDBusinessRegisration GovernmentIDType = "business-registration" + GovernmentIDCommercialRegistry GovernmentIDType = "commercial-registry" + GovernmentIDBirthCert GovernmentIDType = "birth-certificate" + GovernmentIDRefugee GovernmentIDType = "refugee-id" + GovernmentIDDiplomaticPass GovernmentIDType = "diplomatic-passport" + GovernmentIDPersonalID GovernmentIDType = "personal-id" ) type Business struct { @@ -136,14 +156,17 @@ var ( // // TODO(adam): https://www.opensanctions.org/reference/#schema.Vessel type Vessel struct { - Name string `json:"name"` - IMONumber string `json:"imoNumber"` - Type VesselType `json:"type"` - Flag string `json:"flag"` // ISO-3166 // TODO(adam): - Built *time.Time `json:"built"` - Model string `json:"model"` - Tonnage int `json:"tonnage"` - MMSI string `json:"mmsi"` // Maritime Mobile Service Identity + Name string `json:"name"` + IMONumber string `json:"imoNumber"` + Type VesselType `json:"type"` + Flag string `json:"flag"` // ISO-3166 // TODO(adam): + Built *time.Time `json:"built"` + Model string `json:"model"` + Tonnage int `json:"tonnage"` + MMSI string `json:"mmsi"` // Maritime Mobile Service Identity + CallSign string `json:"callSign"` + GrossRegisteredTonnage int `json:"grossRegisteredTonnage"` + Owner string `json:"owner"` } type VesselType string @@ -176,3 +199,21 @@ type Address struct { Latitude float64 `json:"latitude"` Longitude float64 `json:"longitude"` } + +type Affiliation struct { + EntityName string `json:"entityName"` + Type string `json:"type"` // e.g., "Linked To", "Subsidiary Of", "Owned By" + Details string `json:"details,omitempty"` +} + +type SanctionsInfo struct { + Programs []string `json:"programs"` // e.g., "SDGT", "IRGC" + Secondary bool `json:"secondary"` // Subject to secondary sanctions + Description string `json:"description"` // Additional details +} + +type HistoricalInfo struct { + Type string `json:"type"` // e.g., "Former Name", "Previous Flag" + Value string `json:"value"` // The historical value + Date time.Time `json:"date,omitempty"` +} From 5785fe210cf2b817ae5e85d5196d636b9718e83d Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 10:31:09 -0600 Subject: [PATCH 10/26] address: use libpostal to parse addresses --- go.mod | 1 + go.sum | 2 + pkg/address/address.go | 1 + pkg/address/address_test.go | 70 ++++++++++++++++++++++++++++ pkg/address/address_unix.go | 93 +++++++++++++++++++++++++++++++++++++ pkg/ofac/mapper.go | 68 +++++++++++++++++++++------ 6 files changed, 220 insertions(+), 15 deletions(-) create mode 100644 pkg/address/address.go create mode 100644 pkg/address/address_test.go create mode 100644 pkg/address/address_unix.go diff --git a/go.mod b/go.mod index 19dbb662..07489416 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/jaswdr/faker v1.19.1 github.com/knieriem/odf v0.1.0 github.com/moov-io/base v0.48.2 + github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519 github.com/pariz/gountries v0.1.6 github.com/prometheus/client_golang v1.17.0 github.com/stretchr/testify v1.8.4 diff --git a/go.sum b/go.sum index 1c0e32c1..c2eb583e 100644 --- a/go.sum +++ b/go.sum @@ -99,6 +99,8 @@ github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvls github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k= github.com/moov-io/base v0.48.2 h1:BPSNgmwokOVaVzAMJg71L48LCrDYelMfVXJEiZb2zOY= github.com/moov-io/base v0.48.2/go.mod h1:u1/WC3quR6otC9NrM1TtXSwNti1A/m7MR49RIXY1ee4= +github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519 h1:xZ0ZhxCnrs2zaBBvGIHQqzoeXjzctJP61r+aX3QjXhQ= +github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519/go.mod h1:Ycrd7XnwQdumHzpB/6WEa85B4WNdbLC6Wz4FAQNkaV0= github.com/pariz/gountries v0.1.6 h1:Cu8sBSvD6HvAtzinKJ7Yw8q4wAF2dD7oXjA5yDJQt1I= github.com/pariz/gountries v0.1.6/go.mod h1:Et5QWMc75++5nUKSYKNtz/uc+2LHl4LKhNd6zwdTu+0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= diff --git a/pkg/address/address.go b/pkg/address/address.go new file mode 100644 index 00000000..855782dd --- /dev/null +++ b/pkg/address/address.go @@ -0,0 +1 @@ +package address diff --git a/pkg/address/address_test.go b/pkg/address/address_test.go new file mode 100644 index 00000000..ab44397d --- /dev/null +++ b/pkg/address/address_test.go @@ -0,0 +1,70 @@ +package address + +import ( + "fmt" + "testing" + + "github.com/moov-io/watchman/pkg/search" + + postal "github.com/openvenues/gopostal/parser" + "github.com/stretchr/testify/require" +) + +func TestParseAddress(t *testing.T) { + cases := []struct { + input string + expected search.Address + }{ + { + input: "101 Maple Street Apt 202 Bigcity, New York 11222", + expected: search.Address{ + Line1: "101 maple street", + Line2: "apt 202", + City: "bigcity", + PostalCode: "11222", + State: "new york", + }, + }, + } + for _, tc := range cases { + name := fmt.Sprintf("%#v", tc.expected) + + t.Run(name, func(t *testing.T) { + got := ParseAddress(tc.input) + require.Equal(t, tc.expected, got) + }) + } +} + +func TestOrganizeLibpostalComponents(t *testing.T) { + cases := []struct { + parts []postal.ParsedComponent + expected search.Address + }{ + { + parts: []postal.ParsedComponent{ + {Label: "house_number", Value: "101"}, + {Label: "road", Value: "Main Street"}, + {Label: "city", Value: "Springfield"}, + {Label: "state", Value: "Illinois"}, + {Label: "postcode", Value: "62704"}, + {Label: "country", Value: "United States"}, + }, + expected: search.Address{ + Line1: "101 Main Street", + City: "Springfield", + PostalCode: "62704", + State: "Illinois", + Country: "United States", + }, + }, + } + for _, tc := range cases { + name := fmt.Sprintf("%#v", tc.expected) + + t.Run(name, func(t *testing.T) { + got := organizeLibpostalComponents(tc.parts) + require.Equal(t, tc.expected, got) + }) + } +} diff --git a/pkg/address/address_unix.go b/pkg/address/address_unix.go new file mode 100644 index 00000000..6cff2424 --- /dev/null +++ b/pkg/address/address_unix.go @@ -0,0 +1,93 @@ +package address + +import ( + "strings" + + "github.com/moov-io/watchman/pkg/search" + + postal "github.com/openvenues/gopostal/parser" +) + +func ParseAddress(input string) search.Address { + parts := postal.ParseAddress(input) + + return organizeLibpostalComponents(parts) +} + +func organizeLibpostalComponents(parsed []postal.ParsedComponent) search.Address { + // Convert the slice of ParsedComponents into a map for easy access + components := make(map[string]string) + for _, c := range parsed { + components[c.Label] = c.Value + } + + var addr search.Address + + var houseParts []string + var line2Parts []string + + // If building name (house) is present, include it in line1 + if val, ok := components["house"]; ok && val != "" { + houseParts = append(houseParts, val) + } + + // Add house_number + road to line1 + if val, ok := components["house_number"]; ok && val != "" { + houseParts = append(houseParts, val) + } + if val, ok := components["road"]; ok && val != "" { + houseParts = append(houseParts, val) + } + + addr.Line1 = joinNonEmpty(houseParts, " ") + + // Append unit, level, staircase, entrance to line2 if present + secondaryLabels := []string{"unit", "level", "staircase", "entrance"} + for _, label := range secondaryLabels { + if val, ok := components[label]; ok && val != "" { + line2Parts = append(line2Parts, val) + } + } + addr.Line2 = joinNonEmpty(line2Parts, ", ") + + // City: prefer city, if not present fallback to city_district or suburb + if val, ok := components["city"]; ok && val != "" { + addr.City = val + } else if val, ok := components["city_district"]; ok && val != "" { + addr.City = val + } else if val, ok := components["suburb"]; ok && val != "" { + addr.City = val + } + + // PostalCode + if val, ok := components["postcode"]; ok && val != "" { + addr.PostalCode = val + } + + // State: prefer state if present, else state_district + if val, ok := components["state"]; ok && val != "" { + addr.State = val + } else if val, ok := components["state_district"]; ok && val != "" { + addr.State = val + } + + // Country + if val, ok := components["country"]; ok && val != "" { + addr.Country = val + } + + // Latitude/Longitude not provided by libpostal parsing + // If you have them from another source, set them here. + + return addr +} + +func joinNonEmpty(parts []string, sep string) string { + var nonEmpty []string + for _, p := range parts { + if p != "" { + nonEmpty = append(nonEmpty, p) + } + } + return strings.Join(nonEmpty, sep) +} diff --git a/pkg/ofac/mapper.go b/pkg/ofac/mapper.go index f8468007..6dcd7f1d 100644 --- a/pkg/ofac/mapper.go +++ b/pkg/ofac/mapper.go @@ -6,6 +6,7 @@ import ( "strings" "time" + "github.com/moov-io/watchman/pkg/address" "github.com/moov-io/watchman/pkg/search" ) @@ -212,6 +213,14 @@ func ToEntity(sdn SDN) search.Entity[SDN] { return out } +func parseAddresses(inputs []string) []search.Address { + out := make([]search.Address, len(inputs)) + for i := range inputs { + out[i] = address.ParseAddress(inputs[i]) + } + return out +} + func parseAltNames(remarks []string) []string { var names []string for _, r := range remarks { @@ -288,21 +297,23 @@ func parseGovernmentIDs(remarks []string) []search.GovernmentID { } for _, r := range remarks { + // Extract country first + countryRaw := extractCountry(r) + country := normalizeCountry(countryRaw) + + // Remove the country from the remark for cleaner ID extraction + remarkWithoutCountry := r + if countryRaw != "" { + remarkWithoutCountry = strings.TrimSpace(strings.ReplaceAll(r, "("+countryRaw+")", "")) + } + for re, idType := range idPatterns { - if matches := re.FindStringSubmatch(r); len(matches) > 1 { - // Clean the identifier by removing trailing punctuation + if matches := re.FindStringSubmatch(remarkWithoutCountry); len(matches) > 1 { identifier := strings.TrimRight(matches[1], ".;,") - // Extract country and dates if present - country := extractCountry(r) - - // Some IDs have issued/expiry dates - we could add these to the GovernmentID struct - // issued := extractDate(r, "issued") - // expires := extractDate(r, "expires") - ids = append(ids, search.GovernmentID{ Type: idType, - Country: normalizeCountryCode(country), + Country: country, // Use the extracted and normalized country Identifier: identifier, }) } @@ -312,6 +323,38 @@ func parseGovernmentIDs(remarks []string) []search.GovernmentID { return ids } +func normalizeCountry(country string) string { + // Mapping of common country name variations to standard names + countryMap := map[string]string{ + "USA": "United States", + "U.S.A.": "United States", + "US": "United States", + "U.S.": "United States", + "UK": "United Kingdom", + "U.K.": "United Kingdom", + "UAE": "United Arab Emirates", + "ROK": "South Korea", + "DPRK": "North Korea", + "PRC": "China", + "ROC": "Taiwan", + "россия": "Russia", + "РОССИЯ": "Russia", + "中国": "China", + "日本": "Japan", + "한국": "South Korea", + "España": "Spain", + "ESPAÑA": "Spain", + } + + // First try direct mapping + if normalized, exists := countryMap[strings.ToUpper(strings.TrimSpace(country))]; exists { + return normalized + } + + // If no direct mapping, return original (could be extended with more sophisticated matching) + return country +} + func normalizeCountryCode(country string) string { // TODO: Implement conversion to ISO-3166 return strings.TrimSpace(country) @@ -354,11 +397,6 @@ func parseSerialNumber(remarks []string) string { return "" } -func parseAddresses(remarks []string) []search.Address { - // TODO: Implement address parsing - return nil -} - var ( // Regular expressions for parsing relationships and sanctions linkedToRegex = regexp.MustCompile(`(?i)Linked\s+To:\s+([^;]+)`) From 8af5585699c9dcea34ce81ac132579d813686840 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 10:31:25 -0600 Subject: [PATCH 11/26] search: fix test after adding Affiliations, SanctionsInfo, HistoricalInfo, Titles --- pkg/search/models.go | 4 +++- pkg/search/models_test.go | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pkg/search/models.go b/pkg/search/models.go index 22bdee5b..01783eec 100644 --- a/pkg/search/models.go +++ b/pkg/search/models.go @@ -1,6 +1,8 @@ package search -import "time" +import ( + "time" +) type Entity[T any] struct { Name string `json:"name"` diff --git a/pkg/search/models_test.go b/pkg/search/models_test.go index f6a0ca5f..a0b4667b 100644 --- a/pkg/search/models_test.go +++ b/pkg/search/models_test.go @@ -31,6 +31,10 @@ func TestEntityJSON(t *testing.T) { "vessel": null, "cryptoAddresses": null, "addresses": null, + "affiliations": null, + "sanctionsInfo": null, + "historicalInfo": null, + "titles": null, "sourceData": { "entityID": "12345" } From 28ed9f47971ebfe71d04caab0fa69e139f754cbf Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 10:37:57 -0600 Subject: [PATCH 12/26] build: add install steps for libpostal and CI --- .github/workflows/codeql.yaml | 3 ++ .github/workflows/fuzz.yml | 3 ++ .github/workflows/go.yml | 9 +++++ .github/workflows/openshift.yml | 3 ++ .github/workflows/release.yml | 6 +++ .gitignore | 2 + makefile | 68 ++++++++++++++++++++++++++++++++- 7 files changed, 93 insertions(+), 1 deletion(-) diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml index 9287a65b..0f83fd27 100644 --- a/.github/workflows/codeql.yaml +++ b/.github/workflows/codeql.yaml @@ -15,6 +15,9 @@ jobs: - name: Checkout repository uses: actions/checkout@v3 + - name: Install deps + run: make install + - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index c074af37..abf5b9a2 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -25,6 +25,9 @@ jobs: with: fetch-depth: 0 + - name: Install deps + run: make install + - name: Fuzz run: | go test ./pkg/usaddress/... -fuzz Fuzz -fuzztime 10m diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index d20ad6d6..a6f45116 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -24,6 +24,9 @@ jobs: go-version: stable id: go + - name: Install deps + run: make install + - name: Run Tests (Linux) if: runner.os == 'Linux' run: make check @@ -53,6 +56,9 @@ jobs: run: | choco install -y make mingw + - name: Install deps + run: make install + - name: Run Short Tests (Non-Linux) run: | go test ./... -short @@ -85,6 +91,9 @@ jobs: sudo systemctl stop mono-xsp4.service || true sudo killall mono || true + - name: Install deps + run: make install + - name: Build Frontend run: make build diff --git a/.github/workflows/openshift.yml b/.github/workflows/openshift.yml index 4004de38..03e2aa42 100644 --- a/.github/workflows/openshift.yml +++ b/.github/workflows/openshift.yml @@ -29,6 +29,9 @@ jobs: - name: Check out code into the Go module directory uses: actions/checkout@v3 + - name: Install deps + run: make install + - name: Docker Build if: runner.os == 'Linux' run: make docker-openshift diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a37f074e..6c2f13fd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -21,6 +21,9 @@ jobs: - name: Check out code into the Go module directory uses: actions/checkout@v4 + - name: Install deps + run: make install + - name: Short Tests if: runner.os == 'Linux' env: @@ -74,6 +77,9 @@ jobs: - name: Check out code into the Go module directory uses: actions/checkout@v4 + - name: Install deps + run: make install + - name: Load Release URL File from release job uses: actions/download-artifact@v4 with: diff --git a/.gitignore b/.gitignore index 05d3c8a5..7df91ba1 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,8 @@ openapi-generator*jar *.db +/libpostal/ + webui/build/ webui/node_modules/ diff --git a/makefile b/makefile index 7e28eb4a..76f13150 100644 --- a/makefile +++ b/makefile @@ -19,6 +19,72 @@ endif run: CGO_ENABLED=0 go run github.com/moov-io/watchman/cmd/server +# Detect OS +ifeq ($(OS),Windows_NT) + detected_OS := Windows +else + detected_OS := $(shell uname -s) +endif + +# Detect architecture for macOS +ifeq ($(detected_OS),Darwin) + ARCH := $(shell uname -m) + ifeq ($(ARCH),arm64) + CONFIGURE_FLAGS := --datadir=/tmp/libpostal-data --disable-sse2 + else + CONFIGURE_FLAGS := --datadir=/tmp/libpostal-data + endif +else + CONFIGURE_FLAGS := --datadir=/tmp/libpostal-data +endif + +# Installation target +install: +ifeq ($(detected_OS),Windows) + @$(MAKE) install-windows +else ifeq ($(detected_OS),Linux) + @$(MAKE) install-linux +else ifeq ($(detected_OS),Darwin) + @$(MAKE) install-macos +else + @echo "Unsupported operating system: $(detected_OS)" + @exit 1 +endif + +install-linux: + sudo apt-get install -y curl autoconf automake libtool pkg-config + @$(MAKE) install-libpostal + +install-macos: + brew install curl autoconf automake libtool pkg-config + @echo "Detecting architecture: $(ARCH)" +ifeq ($(ARCH),arm64) + @echo "ARM architecture detected (M1/M2). SSE2 will be disabled." +else + @echo "Intel architecture detected. SSE2 optimizations will be enabled." +endif + @$(MAKE) install-libpostal + +install-windows: + pacman -Syu + pacman -S autoconf automake curl git make libtool gcc mingw-w64-x86_64-gcc + @$(MAKE) install-libpostal + +install-libpostal: + @echo "Cloning libpostal repository..." + git clone https://github.com/openvenues/libpostal || true + cd libpostal && \ + ./bootstrap.sh && \ + ./configure $(CONFIGURE_FLAGS) && \ + make -j$(shell nproc || echo 4) && \ + if [ "$(detected_OS)" = "Windows" ]; then \ + make install; \ + else \ + sudo make install; \ + fi + +.PHONY: install install-linux install-macos install-windows install-libpostal + build: build-server build-batchsearch build-watchmantest ifeq ($(OS),Windows_NT) @echo "Skipping webui build on Windows." @@ -38,7 +104,7 @@ build-watchmantest: .PHONY: check check: ifeq ($(OS),Windows_NT) - @echo "Skipping checks on Windows, currently unsupported." + go test ./... -short else @wget -O lint-project.sh https://raw.githubusercontent.com/moov-io/infra/master/go/lint-project.sh @chmod +x ./lint-project.sh From e3c6637409cba4fd551b29d8f739e81ae4b0b168 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 12:00:21 -0600 Subject: [PATCH 13/26] build: remove watchmantest --- .github/workflows/examples.yml | 27 ------ .github/workflows/release.yml | 3 - cmd/watchmantest/Dockerfile | 15 ---- cmd/watchmantest/README.md | 21 ----- cmd/watchmantest/main.go | 147 --------------------------------- cmd/watchmantest/search.go | 108 ------------------------ cmd/watchmantest/values.go | 31 ------- makefile | 9 +- 8 files changed, 2 insertions(+), 359 deletions(-) delete mode 100644 .github/workflows/examples.yml delete mode 100644 cmd/watchmantest/Dockerfile delete mode 100644 cmd/watchmantest/README.md delete mode 100644 cmd/watchmantest/main.go delete mode 100644 cmd/watchmantest/search.go delete mode 100644 cmd/watchmantest/values.go diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml deleted file mode 100644 index 02d8291d..00000000 --- a/.github/workflows/examples.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Examples - -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] - -jobs: - build: - name: Go Build - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ ubuntu-latest ] - steps: - - name: Set up Go 1.x - uses: actions/setup-go@v4 - with: - go-version: stable - id: go - - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - - name: Docker Build - run: make docker-watchmantest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6c2f13fd..1ef59875 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -157,9 +157,6 @@ jobs: - name: Docker Static run: make docker-static - - name: Docker watchmantest - run: make docker-watchmantest - - name: Docker Push run: |+ echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin diff --git a/cmd/watchmantest/Dockerfile b/cmd/watchmantest/Dockerfile deleted file mode 100644 index df5a51d4..00000000 --- a/cmd/watchmantest/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM golang:1.21-alpine as builder -RUN apk add -U make git -RUN adduser -D -g '' --shell /bin/false moov -WORKDIR /go/src/github.com/moov-io/watchman -COPY . . -RUN go mod download -RUN make build-watchmantest -USER moov - -FROM scratch -COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt -COPY --from=builder /go/src/github.com/moov-io/watchman/bin/watchmantest /bin/watchmantest -COPY --from=builder /etc/passwd /etc/passwd -USER moov -ENTRYPOINT ["/bin/watchmantest"] diff --git a/cmd/watchmantest/README.md b/cmd/watchmantest/README.md deleted file mode 100644 index 9682b60f..00000000 --- a/cmd/watchmantest/README.md +++ /dev/null @@ -1,21 +0,0 @@ -## watchmantest - -`watchmantest` is a cli tool used for testing the Moov Sanction Search service. - -With no arguments the contaier runs tests against the production API. This tool requires an OAuth token provided by github.com/moov-io/api written to the local disk, but running apitest first will write this token. - -This tool can be used to query with custom searches: - -``` -$ go install ./cmd/watchmantest -$ watchmantest -local moh -2019/02/14 23:37:44.432334 main.go:44: Starting moov/watchmantest v0.4.1-dev -2019/02/14 23:37:44.432366 main.go:60: [INFO] using http://localhost:8084 for address -2019/02/14 23:37:44.434534 main.go:76: [SUCCESS] ping -2019/02/14 23:37:44.435204 main.go:83: [SUCCESS] last download was: 3h45m58s ago -2019/02/14 23:37:44.440230 main.go:96: [SUCCESS] name search passed, query="moh" -2019/02/14 23:37:44.445473 main.go:118: [SUCCESS] alt name search passed -2019/02/14 23:37:44.449367 main.go:123: [SUCCESS] address search passed -``` - -__watchmantest is not a stable tool. Please contact Moov developers if you intend to use this tool, otherwise we might change the tool (or remove it) without notice.__ diff --git a/cmd/watchmantest/main.go b/cmd/watchmantest/main.go deleted file mode 100644 index 53df6783..00000000 --- a/cmd/watchmantest/main.go +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright 2022 The Moov Authors -// Use of this source code is governed by an Apache License -// license that can be found in the LICENSE file. - -// watchmantest is a cli tool used for testing the Moov Sanction Search service. -// -// With no arguments the contaier runs tests against the production API. -// This tool requires an OAuth token provided by github.com/moov-io/api written -// to the local disk, but running apitest first will write this token. -// -// This tool can be used to query with custom searches: -// -// $ go install ./cmd/watchmantest -// $ watchmantest -local moh -// 2019/02/14 23:37:44.432334 main.go:44: Starting moov/watchmantest v0.4.1-dev -// 2019/02/14 23:37:44.432366 main.go:60: [INFO] using http://localhost:8084 for address -// 2019/02/14 23:37:44.434534 main.go:76: [SUCCESS] ping -// 2019/02/14 23:37:44.435204 main.go:83: [SUCCESS] last download was: 3h45m58s ago -// 2019/02/14 23:37:44.440230 main.go:96: [SUCCESS] name search passed, query="moh" -// 2019/02/14 23:37:44.445473 main.go:118: [SUCCESS] alt name search passed -// 2019/02/14 23:37:44.449367 main.go:123: [SUCCESS] address search passed -// -// watchmantest is not a stable tool. Please contact Moov developers if you intend to use this tool, -// otherwise we might change the tool (or remove it) without notice. -package main - -import ( - "context" - "errors" - "flag" - "fmt" - "log" - "os" - "time" - - "github.com/moov-io/watchman" - moov "github.com/moov-io/watchman/client" - "github.com/moov-io/watchman/cmd/internal" - - "github.com/antihax/optional" -) - -var ( - flagApiAddress = flag.String("address", internal.DefaultApiAddress, "Moov API address") - flagLocal = flag.Bool("local", false, "Use local HTTP addresses") - - flagRequestID = flag.String("request-id", "", "Override what is set for the X-Request-ID HTTP header") -) - -func main() { - flag.Parse() - - log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds | log.Lshortfile) - log.Printf("Starting moov/watchmantest %s", watchman.Version) - - conf := internal.Config(*flagApiAddress, *flagLocal) - log.Printf("[INFO] using %s for address", conf.BasePath) - - // Read OAuth token and set on conf - if v := os.Getenv("OAUTH_TOKEN"); v != "" { - conf.AddDefaultHeader("Authorization", fmt.Sprintf("Bearer %s", v)) - } else { - if local := *flagLocal; !local { - log.Fatal("[FAILURE] no OAuth token provided (try adding -local for http://localhost requests)") - } - } - - // Setup API client - api, ctx := moov.NewAPIClient(conf), context.TODO() - - // Ping - if err := ping(ctx, api); err != nil { - log.Fatal("[FAILURE] ping Sanction Search") - } else { - log.Println("[SUCCESS] ping") - } - - // Check downloads - if when, err := latestDownload(ctx, api); err != nil || when.IsZero() { - log.Fatalf("[FAILURE] downloads: %v", err) - } else { - log.Printf("[SUCCESS] last download was: %v ago", time.Since(when).Truncate(1*time.Second)) - } - - query := "alh" // string that matches a lot of records - if v := flag.Arg(0); v != "" { - query = v - } - - // Search queries - sdn, err := searchByName(ctx, api, query) - if err != nil { - log.Fatalf("[FAILURE] problem searching SDNs: %v", err) - } else { - log.Printf("[SUCCESS] name search passed, query=%q sdn=%q", query, sdn.EntityID) - } - - // Load alt names and addresses - if err := searchByAltName(ctx, api, query); err != nil { - log.Fatalf("[FAILURE] problem searching Alt Names: %v", err) - } else { - log.Println("[SUCCESS] alt name search passed") - } - if err := searchByAddress(ctx, api, "St"); err != nil { - log.Fatalf("[FAILURE] problem searching addresses: %v", err) - } else { - log.Println("[SUCCESS] address search passed") - } - - // Lookup UI values - if err := getUIValues(ctx, api); err != nil { - log.Fatalf("[FAILURE] problem looking up UI values: %v", err) - } else { - log.Println("[SUCCESS] UI values lookup passed") - } -} - -func ping(ctx context.Context, api *moov.APIClient) error { - resp, err := api.WatchmanApi.Ping(ctx) - if err != nil { - return err - } - resp.Body.Close() - if resp.StatusCode < 200 || resp.StatusCode > 299 { - return fmt.Errorf("ping error (stats code: %d): %v", resp.StatusCode, err) - } - return nil -} - -func latestDownload(ctx context.Context, api *moov.APIClient) (time.Time, error) { - opts := &moov.GetLatestDownloadsOpts{ - Limit: optional.NewInt32(1), - XRequestID: optional.NewString(*flagRequestID), - } - downloads, resp, err := api.WatchmanApi.GetLatestDownloads(ctx, opts) - if err != nil { - return time.Time{}, err - } - resp.Body.Close() - if resp.StatusCode < 200 || resp.StatusCode > 299 { - return time.Time{}, fmt.Errorf("download error (stats code: %d): %v", resp.StatusCode, err) - } - if len(downloads) == 0 { - return time.Time{}, errors.New("empty downloads response") - } - return downloads[0].Timestamp, nil -} diff --git a/cmd/watchmantest/search.go b/cmd/watchmantest/search.go deleted file mode 100644 index be4c7f67..00000000 --- a/cmd/watchmantest/search.go +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2022 The Moov Authors -// Use of this source code is governed by an Apache License -// license that can be found in the LICENSE file. - -package main - -import ( - "context" - "errors" - "fmt" - - moov "github.com/moov-io/watchman/client" - - "github.com/antihax/optional" -) - -// searchByName will attempt sanctions searches for the provided name and then load the SDN metadata -// associated to the company/organization or individual. -func searchByName(ctx context.Context, api *moov.APIClient, name string) (*moov.OfacSdn, error) { - opts := &moov.SearchOpts{ - Limit: optional.NewInt32(2), - Name: optional.NewString(name), - XRequestID: optional.NewString(*flagRequestID), - } - - search, resp, err := api.WatchmanApi.Search(ctx, opts) - if err != nil { - return nil, fmt.Errorf("searchByName: %v", err) - } - defer resp.Body.Close() - - if len(search.SDNs) == 0 { - return nil, fmt.Errorf("searchByName: found no SDNs for %q", name) - } - return &search.SDNs[0], nil -} - -// searchByAltName will attempt sanctions searches and retrieval of all alt names associated to the first result -// for the provided altName and error if none are found. -func searchByAltName(ctx context.Context, api *moov.APIClient, alt string) error { - opts := &moov.SearchOpts{ - AltName: optional.NewString(alt), - Limit: optional.NewInt32(2), - XRequestID: optional.NewString(*flagRequestID), - } - - search, resp, err := api.WatchmanApi.Search(ctx, opts) - if err != nil { - return fmt.Errorf("searchByAltName: %v", err) - } - defer resp.Body.Close() - - if len(search.AltNames) == 0 { - return fmt.Errorf("searchByAltName: found no AltNames for %q", alt) - } - return getSDNAltNames(ctx, api, search.AltNames[0].EntityID) -} - -// searchByAddress will attempt sanctions searches and retrieval of all addresses associated to the first result -// for the provided address and error if none are found. -func searchByAddress(ctx context.Context, api *moov.APIClient, address string) error { - opts := &moov.SearchOpts{ - Address: optional.NewString(address), - Limit: optional.NewInt32(2), - XRequestID: optional.NewString(*flagRequestID), - } - - search, resp, err := api.WatchmanApi.Search(ctx, opts) - if err != nil { - return fmt.Errorf("searchByAddress: %v", err) - } - defer resp.Body.Close() - - if len(search.Addresses) == 0 { - return fmt.Errorf("searchByAddress: found no Addresses for %q", address) - } - return getSDNAddresses(ctx, api, search.Addresses[0].EntityID) -} - -func getSDNAddresses(ctx context.Context, api *moov.APIClient, id string) error { - addr, resp, err := api.WatchmanApi.GetSDNAddresses(ctx, id, nil) - if err != nil { - return fmt.Errorf("loadAddresses: %v", err) - } - defer resp.Body.Close() - if len(addr) == 0 { - return errors.New("loadAddresses: no Addresses found") - } - if addr[0].EntityID != id { - return fmt.Errorf("loadAddresses: wrong Address: expected %s but got %s", id, addr[0].EntityID) - } - return nil -} - -func getSDNAltNames(ctx context.Context, api *moov.APIClient, id string) error { - alt, resp, err := api.WatchmanApi.GetSDNAltNames(ctx, id, nil) - if err != nil { - return fmt.Errorf("loadAltNames: %v", err) - } - defer resp.Body.Close() - if len(alt) == 0 { - return errors.New("loadAltNames: no AltNames found") - } - if alt[0].EntityID != id { - return fmt.Errorf("loadAltNames: wrong AltName: expected %s but got %s", id, alt[0].EntityID) - } - return nil -} diff --git a/cmd/watchmantest/values.go b/cmd/watchmantest/values.go deleted file mode 100644 index b260bf5d..00000000 --- a/cmd/watchmantest/values.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2022 The Moov Authors -// Use of this source code is governed by an Apache License -// license that can be found in the LICENSE file. - -package main - -import ( - "context" - "fmt" - - moov "github.com/moov-io/watchman/client" - - "github.com/antihax/optional" -) - -func getUIValues(ctx context.Context, api *moov.APIClient) error { - req := &moov.GetUIValuesOpts{ - Limit: optional.NewInt32(10), - } - values, resp, err := api.WatchmanApi.GetUIValues(ctx, "sdnType", req) - if err != nil { - return fmt.Errorf("getUIValues: %v", err) - } - resp.Body.Close() - - if len(values) == 0 { - return fmt.Errorf("no values found") - } - - return nil -} diff --git a/makefile b/makefile index 76f13150..8a733b1a 100644 --- a/makefile +++ b/makefile @@ -85,7 +85,7 @@ install-libpostal: .PHONY: install install-linux install-macos install-windows install-libpostal -build: build-server build-batchsearch build-watchmantest +build: build-server build-batchsearch ifeq ($(OS),Windows_NT) @echo "Skipping webui build on Windows." else @@ -147,7 +147,7 @@ else CGO_ENABLED=0 GOOS=$(PLATFORM) go build -o bin/watchman-$(PLATFORM)-amd64 github.com/moov-io/watchman/cmd/server endif -docker: clean docker-hub docker-openshift docker-static docker-watchmantest +docker: clean docker-hub docker-openshift docker-static docker-hub: docker build --pull --build-arg VERSION=${VERSION} -t moov/watchman:$(VERSION) -f Dockerfile . @@ -160,10 +160,6 @@ docker-openshift: docker-static: docker build --pull --build-arg VERSION=${VERSION} -t moov/watchman:static -f Dockerfile-static . -docker-watchmantest: - docker build --pull --build-arg VERSION=${VERSION} -t moov/watchmantest:$(VERSION) -f ./cmd/watchmantest/Dockerfile . - docker tag moov/watchmantest:$(VERSION) moov/watchmantest:latest - release: docker AUTHORS go vet ./... go test -coverprofile=cover-$(VERSION).out ./... @@ -173,7 +169,6 @@ release-push: docker push moov/watchman:$(VERSION) docker push moov/watchman:latest docker push moov/watchman:static - docker push moov/watchmantest:$(VERSION) quay-push: docker push quay.io/moov/watchman:$(VERSION) From 0a4a74a9084f32a7a95462e6b340e45ca92210e2 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 12:00:52 -0600 Subject: [PATCH 14/26] build: enable CGO once again --- makefile | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/makefile b/makefile index 8a733b1a..31d7d922 100644 --- a/makefile +++ b/makefile @@ -17,7 +17,7 @@ endif .PHONY: run build build-server docker release check test run: - CGO_ENABLED=0 go run github.com/moov-io/watchman/cmd/server + go run github.com/moov-io/watchman/cmd/server # Detect OS ifeq ($(OS),Windows_NT) @@ -93,13 +93,10 @@ else endif build-server: - CGO_ENABLED=0 go build -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/server github.com/moov-io/watchman/cmd/server + go build -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/server github.com/moov-io/watchman/cmd/server build-batchsearch: - CGO_ENABLED=0 go build -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/batchsearch github.com/moov-io/watchman/cmd/batchsearch - -build-watchmantest: - CGO_ENABLED=0 go build -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/watchmantest github.com/moov-io/watchman/cmd/watchmantest + go build -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/batchsearch github.com/moov-io/watchman/cmd/batchsearch .PHONY: check check: @@ -142,9 +139,9 @@ endif dist: clean build ifeq ($(OS),Windows_NT) - CGO_ENABLED=0 GOOS=windows go build -o bin/watchman.exe github.com/moov-io/watchman/cmd/server + GOOS=windows go build -o bin/watchman.exe github.com/moov-io/watchman/cmd/server else - CGO_ENABLED=0 GOOS=$(PLATFORM) go build -o bin/watchman-$(PLATFORM)-amd64 github.com/moov-io/watchman/cmd/server + GOOS=$(PLATFORM) go build -o bin/watchman-$(PLATFORM)-amd64 github.com/moov-io/watchman/cmd/server endif docker: clean docker-hub docker-openshift docker-static From 6af891bebc6c917f050a5404f8cc055baa9afe50 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 12:01:16 -0600 Subject: [PATCH 15/26] docs: minor readme tweaks --- README.md | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index c348c3d5..2d4f12f9 100644 --- a/README.md +++ b/README.md @@ -259,20 +259,11 @@ By design, Watchman **does not persist** (save) any data about the search queri ### Go library -This project uses [Go Modules](https://go.dev/blog/using-go-modules) and Go v1.18 or newer. See [Golang's install instructions](https://golang.org/doc/install) for help setting up Go. You can download the source code and we offer [tagged and released versions](https://github.com/moov-io/watchman/releases/latest) as well. We highly recommend you use a tagged release for production. - -``` -$ git@github.com:moov-io/watchman.git - -# Pull down into the Go Module cache -$ go get -u github.com/moov-io/watchman - -$ go doc github.com/moov-io/watchman/client Search -``` +Watchman offers [several packages for usage as libraries](https://pkg.go.dev/github.com/moov-io/watchman/pkg). ### In-browser Watchman search -Using our [in-browser utility](https://oss.moov.io/watchman/), you can instantly perform advanced Watchman searches. Simply fill search fields and generate a detailed report that includes match percentage, alternative names, effective/expiration dates, IDs, addresses, and other useful information. This tool is particularly useful for completing quick searches with the aid of a intuitive interface. +Using the [WebUI](https://moov-io.github.io/watchman/webui/), you can instantly perform advanced OFAC Watchman searches. Simply fill search fields and generate a detailed report that includes match percentage, alternative names, effective/expiration dates, IDs, addresses, and other useful information. This tool is particularly useful for completing quick searches with the aid of a intuitive interface. ## Reporting blocks to OFAC From 03a3d5b8f63283eba0e364cab18e7be8cb5b35b3 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 12:30:24 -0600 Subject: [PATCH 16/26] build: installing libpostal in docker images --- .dockerignore | 1 + Dockerfile | 10 ++++++---- Dockerfile-openshift | 26 ++++++++++++++++++-------- README.md | 2 +- makefile | 7 ++++--- 5 files changed, 30 insertions(+), 16 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..82c4e456 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +libpostal/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 0484a8dd..8b67e0aa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,20 @@ -FROM golang:alpine as backend +FROM golang:1.23-bookworm as backend ARG VERSION WORKDIR /src COPY . /src RUN go mod download -RUN CGO_ENABLED=0 go build -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/server /src/cmd/server +RUN apt-get update && apt-get install -y curl autoconf automake libtool pkg-config +RUN make install +RUN go build -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/server /src/cmd/server -FROM node:21-alpine as frontend +FROM node:22-bookworm as frontend ARG VERSION COPY webui/ /watchman/ WORKDIR /watchman/ RUN npm install --legacy-peer-deps RUN npm run build -FROM alpine:latest +FROM debian:bookworm LABEL maintainer="Moov " COPY --from=backend /src/bin/server /bin/server COPY --from=frontend /watchman/build/ /watchman/ diff --git a/Dockerfile-openshift b/Dockerfile-openshift index b48cf01c..76e8dc9b 100644 --- a/Dockerfile-openshift +++ b/Dockerfile-openshift @@ -1,28 +1,38 @@ -FROM quay.io/fedora/fedora:40-x86_64 as builder +# Stage 1: Install dependencies with root privileges +FROM registry.access.redhat.com/ubi9/go-toolset as builder-deps +USER root +RUN dnf install -y --allowerasing --setopt=tsflags=nodocs \ + curl \ + autoconf \ + automake \ + libtool \ + pkgconfig \ + && dnf clean all + +# Stage 2: Build the application +FROM registry.access.redhat.com/ubi9/go-toolset as builder ARG VERSION -RUN yum install -y git golang make npm wget glibc WORKDIR /opt/app-root/src/ +COPY --from=builder-deps /usr /usr COPY . . RUN go mod download +RUN make install RUN VERSION=${VERSION} make build-server +# Stage 3: Frontend build FROM node:21-bookworm as frontend COPY webui/ /watchman/ WORKDIR /watchman/ RUN npm install --legacy-peer-deps RUN npm run build -FROM quay.io/fedora/fedora:40-x86_64 -RUN yum install -y glibc - +# Stage 4: Final stage +FROM registry.access.redhat.com/ubi9/ubi-minimal:9.5-1733767867 ARG VERSION=unknown LABEL maintainer="Moov " LABEL name="watchman" LABEL version=$VERSION - COPY --from=builder /opt/app-root/src/bin/server /bin/server - COPY --from=frontend /watchman/build/ /watchman/ ENV WEB_ROOT=/watchman/ - ENTRYPOINT ["/bin/server"] diff --git a/README.md b/README.md index 2d4f12f9..51563867 100644 --- a/README.md +++ b/README.md @@ -300,7 +300,7 @@ Note: 32-bit platforms have known issues and are not supported. Yes please! Please review our [Contributing guide](CONTRIBUTING.md) and [Code of Conduct](https://github.com/moov-io/ach/blob/master/CODE_OF_CONDUCT.md) to get started! Checkout our [issues for first time contributors](https://github.com/moov-io/watchman/contribute) for something to help out with. -This project uses [Go Modules](https://go.dev/blog/using-go-modules) and Go v1.18 or newer. See [Golang's install instructions](https://golang.org/doc/install) for help setting up Go. You can download the source code and we offer [tagged and released versions](https://github.com/moov-io/watchman/releases/latest) as well. We highly recommend you use a tagged release for production. +Run `make install` to setup [gopostal](https://github.com/openvenues/gopostal) / [libpostal](https://github.com/openvenues/libpostal) for Watchman. ### Releasing diff --git a/makefile b/makefile index 31d7d922..16d643a2 100644 --- a/makefile +++ b/makefile @@ -38,6 +38,9 @@ else CONFIGURE_FLAGS := --datadir=/tmp/libpostal-data endif +# Detect if we need sudo +SUDO := $(shell if command -v sudo >/dev/null 2>&1 && sudo -n true >/dev/null 2>&1; then echo "sudo"; else echo ""; fi) + # Installation target install: ifeq ($(detected_OS),Windows) @@ -52,12 +55,10 @@ else endif install-linux: - sudo apt-get install -y curl autoconf automake libtool pkg-config @$(MAKE) install-libpostal install-macos: brew install curl autoconf automake libtool pkg-config - @echo "Detecting architecture: $(ARCH)" ifeq ($(ARCH),arm64) @echo "ARM architecture detected (M1/M2). SSE2 will be disabled." else @@ -80,7 +81,7 @@ install-libpostal: if [ "$(detected_OS)" = "Windows" ]; then \ make install; \ else \ - sudo make install; \ + $(SUDO) make install; \ fi .PHONY: install install-linux install-macos install-windows install-libpostal From 246c8af5aa80e7f6310918ed6836d5475a524651 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 14:34:32 -0600 Subject: [PATCH 17/26] build: get libpostal working in docker images --- Dockerfile | 43 +++++++++++++++++++++++++++++++++++++---- Dockerfile-openshift | 46 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 84 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8b67e0aa..0f45a3e6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,8 +3,22 @@ ARG VERSION WORKDIR /src COPY . /src RUN go mod download -RUN apt-get update && apt-get install -y curl autoconf automake libtool pkg-config -RUN make install +RUN apt-get update && apt-get install -y curl autoconf automake libtool pkg-config git + +# Clone and install libpostal +RUN git clone https://github.com/openvenues/libpostal.git /src/libpostal +WORKDIR /src/libpostal +RUN ./bootstrap.sh && \ + ./configure && \ + make -j$(shell nproc) && \ + make install && \ + ldconfig + +# Download libpostal data files +RUN libpostal_data download all /usr/local/share/libpostal + +# Build the application +WORKDIR /src RUN go build -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/server /src/cmd/server FROM node:22-bookworm as frontend @@ -16,12 +30,33 @@ RUN npm run build FROM debian:bookworm LABEL maintainer="Moov " + +# Install required runtime dependencies +RUN apt-get update && \ + apt-get install -y \ + libssl3 \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Create necessary directories +RUN mkdir -p /usr/local/share/libpostal + +# Copy libpostal shared libraries and configuration +COPY --from=backend /usr/local/lib/libpostal.so* /usr/local/lib/ +COPY --from=backend /usr/local/lib/pkgconfig/libpostal.pc /usr/local/lib/pkgconfig/ +COPY --from=backend /usr/local/share/libpostal/ /usr/local/share/libpostal/ + +# Update shared library cache +RUN ldconfig + +# Copy application files COPY --from=backend /src/bin/server /bin/server COPY --from=frontend /watchman/build/ /watchman/ -ENV WEB_ROOT=/watchman/ +ENV WEB_ROOT=/watchman/ +ENV LD_LIBRARY_PATH=/usr/local/lib +ENV LIBPOSTAL_DATA_DIR=/usr/local/share/libpostal EXPOSE 8084 EXPOSE 9094 - ENTRYPOINT ["/bin/server"] diff --git a/Dockerfile-openshift b/Dockerfile-openshift index 76e8dc9b..ff425674 100644 --- a/Dockerfile-openshift +++ b/Dockerfile-openshift @@ -7,16 +7,42 @@ RUN dnf install -y --allowerasing --setopt=tsflags=nodocs \ automake \ libtool \ pkgconfig \ + gcc \ + gcc-c++ \ + make \ + git \ && dnf clean all +# Install libpostal with models - Fixed path handling +RUN git clone https://github.com/openvenues/libpostal \ + && cd libpostal \ + && ./bootstrap.sh \ + && ./configure --prefix=/usr/local \ + && make -j4 \ + && make install \ + && mkdir -p /usr/local/share/libpostal \ + && cd src \ + && PATH=$PATH:/usr/local/bin ./libpostal_data download all /usr/local/share/libpostal \ + && chown -R 1001:0 /usr/local \ + && chmod -R g=u /usr/local + # Stage 2: Build the application FROM registry.access.redhat.com/ubi9/go-toolset as builder ARG VERSION WORKDIR /opt/app-root/src/ + +# Copy the entire /usr and /usr/local directories COPY --from=builder-deps /usr /usr +COPY --from=builder-deps /usr/local /usr/local + +# Set environment variables for build +ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig +ENV LD_LIBRARY_PATH=/usr/local/lib + +# Copy source and build COPY . . RUN go mod download -RUN make install +USER 1001 RUN VERSION=${VERSION} make build-server # Stage 3: Frontend build @@ -32,7 +58,25 @@ ARG VERSION=unknown LABEL maintainer="Moov " LABEL name="watchman" LABEL version=$VERSION + +# Install runtime dependencies +USER root +RUN microdnf install -y \ + libstdc++ \ + && microdnf clean all + +# Copy libpostal files and setup +COPY --from=builder-deps /usr/local /usr/local +ENV LD_LIBRARY_PATH=/usr/local/lib + +# Copy application files COPY --from=builder /opt/app-root/src/bin/server /bin/server COPY --from=frontend /watchman/build/ /watchman/ ENV WEB_ROOT=/watchman/ + +# Set final permissions +RUN chown -R 1001:0 /bin/server /watchman \ + && chmod -R g=u /bin/server /watchman + +USER 1001 ENTRYPOINT ["/bin/server"] From 2175b0e3a5660b3c906599685a953fafd6f3afaf Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 14:34:58 -0600 Subject: [PATCH 18/26] build: remove batchsearch, finish libpostal setup --- .github/workflows/go.yml | 14 --- cmd/batchsearch/README.md | 19 ---- cmd/batchsearch/main.go | 219 -------------------------------------- makefile | 43 ++------ 4 files changed, 8 insertions(+), 287 deletions(-) delete mode 100644 cmd/batchsearch/README.md delete mode 100644 cmd/batchsearch/main.go diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index a6f45116..a571e2da 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -100,17 +100,3 @@ jobs: - name: Docker Build if: runner.os == 'Linux' run: make docker-hub - - - name: Build batchsearch - if: runner.os == 'Linux' - run: make build-batchsearch - - # - name: Integration Test - # if: runner.os == 'Linux' - # run: make test-integration - - - name: Test Cleanup - if: runner.os == 'Linux' && always() - run: | - docker compose logs - make clean-integration diff --git a/cmd/batchsearch/README.md b/cmd/batchsearch/README.md deleted file mode 100644 index 0877e1c2..00000000 --- a/cmd/batchsearch/README.md +++ /dev/null @@ -1,19 +0,0 @@ -## batchsearch - -`batchsearch` is a cli tool used for testing batches of names against Moov's Watchman service. - -With no arguments the contaier runs tests against the production API, but we strongly ask you run batchsearch against local instances of Watchman. - -``` -$ go install ./cmd/batchsearch -$ batchsearch -allowed-file users.txt -blocked-file criminals.txt -threshold 0.99 -sdn-type individual -v -2019/10/09 17:36:16.160025 main.go:61: Starting moov/batchsearch v0.12.0-dev -2019/10/09 17:36:16.160055 main.go:64: [INFO] using http://localhost:8084 for address -2019/10/09 17:36:16.161818 main.go:73: [SUCCESS] ping -2019/10/09 17:36:16.174108 main.go:156: [INFO] didn't block 'Husein HAZEM' -2019/10/09 17:36:16.212986 main.go:148: [INFO] blocked 'Nicolas Ernesto MADURO GUERRA' -2019/10/09 17:36:16.213423 main.go:146: [ERROR] 'Maria Alexandra PERDOMO' wasn't blocked (match=0.96) -exit status 1 -``` - -__batchsearch is not a stable tool. Please contact Moov developers if you intend to use this tool, otherwise we might change the tool (or remove it) without notice.__ diff --git a/cmd/batchsearch/main.go b/cmd/batchsearch/main.go deleted file mode 100644 index bd015097..00000000 --- a/cmd/batchsearch/main.go +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright 2022 The Moov Authors -// Use of this source code is governed by an Apache License -// license that can be found in the LICENSE file. - -// batchsearch is a cli tool used for testing batches of names against Moov's sanctions service. -// -// With no arguments the contaier runs tests against the production API, but we strongly ask you -// run batchsearch against local instances of Watchman. -// -// $ go install ./cmd/batchsearch -// $ batchsearch -allowed-file users.txt -blocked-file criminals.txt -threshold 0.99 -sdn-type individual -v -// 2019/10/09 17:36:16.160025 main.go:61: Starting moov/batchsearch v0.12.0-dev -// 2019/10/09 17:36:16.160055 main.go:64: [INFO] using http://localhost:8084 for address -// 2019/10/09 17:36:16.161818 main.go:73: [SUCCESS] ping -// 2019/10/09 17:36:16.174108 main.go:156: [INFO] didn't block 'Husein HAZEM' -// 2019/10/09 17:36:16.212986 main.go:148: [INFO] blocked 'Nicolas Ernesto MADURO GUERRA' -// 2019/10/09 17:36:16.213423 main.go:146: [ERROR] 'Maria Alexandra PERDOMO' wasn't blocked (match=0.96) -// exit status 1 -// -// batchsearch is not a stable tool. Please contact Moov developers if you intend to use this tool, -// otherwise we might change the tool (or remove it) without notice. -package main - -import ( - "bufio" - "context" - "errors" - "flag" - "fmt" - "log" - "os" - "path/filepath" - "runtime" - "strings" - "sync" - "sync/atomic" - "time" - - "github.com/moov-io/watchman" - moov "github.com/moov-io/watchman/client" - "github.com/moov-io/watchman/cmd/internal" - - "github.com/antihax/optional" - "go4.org/syncutil" -) - -var ( - flagApiAddress = flag.String("address", internal.DefaultApiAddress, "Moov API address") - flagLocal = flag.Bool("local", false, "Use local HTTP addresses") - - flagThreshold = flag.Float64("threshold", 0.99, "Minimum match percentage required for blocking") - - flagAllowedFile = flag.String("allowed-file", filepath.Join("test", "testdata", "allowed.txt"), "Filepath to file with names expected to be allowed") - flagBlockedFile = flag.String("blocked-file", filepath.Join("test", "testdata", "blocked.txt"), "Filepath to file with names expected to be blocked") - - flagSdnType = flag.String("sdn-type", "individual", "sdnType query param") - - flagRequestID = flag.String("request-id", "", "Override what is set for the X-Request-ID HTTP header") - - flagVerbose = flag.Bool("v", false, "Enable detailed logging") - - flagWorkers = flag.Int("workers", runtime.NumCPU(), "How many tasks to run concurrently") -) - -func main() { - flag.Parse() - - log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds | log.Lshortfile) - log.Printf("Starting moov/batchsearch %s", watchman.Version) - - conf := internal.Config(*flagApiAddress, *flagLocal) - log.Printf("[INFO] using %s for address", conf.BasePath) - - // Setup API client - api, ctx := moov.NewAPIClient(conf), context.TODO() - - // Ping - if err := ping(ctx, api); err != nil { - log.Fatalf("[FAILURE] ping Sanctions Search: %v", err) - } else { - log.Println("[SUCCESS] ping") - } - - // Perform checks over the two incoming files - if path := *flagAllowedFile; path != "" { - names, err := readNames(path) - if err != nil { - log.Fatalf("[FAILURE] %v", err) - } - if n := checkNames(BlockUnexpected, names, *flagThreshold, api); n == Failure { - os.Exit(int(n)) - } - } - if path := *flagBlockedFile; path != "" { - names, err := readNames(path) - if err != nil { - log.Fatalf("[FAILURE] %v", err) - } - if n := checkNames(BlockExpected, names, *flagThreshold, api); n == Failure { - os.Exit(int(n)) - } - } - log.Println("[SUCCESS] all tests passed") -} - -func ping(ctx context.Context, api *moov.APIClient) error { - resp, err := api.WatchmanApi.Ping(ctx) - if err != nil { - return err - } - resp.Body.Close() - if resp.StatusCode < 200 || resp.StatusCode > 299 { - return fmt.Errorf("ping error (stats code: %d): %v", resp.StatusCode, err) - } - return nil -} - -type action int - -var ( - BlockExpected action = 1 - BlockUnexpected action = 2 -) - -var ( - Success int64 = 0 - Failure int64 = 1 -) - -func checkNames(desc action, names []string, threshold float64, api *moov.APIClient) int64 { - var wg sync.WaitGroup - wg.Add(len(names)) - - var exitCode int64 // must be protected with atomic calls - markFailure := func() { - atomic.CompareAndSwapInt64(&exitCode, Success, Failure) // set Failure as exit code - } - - workers := syncutil.NewGate(*flagWorkers) - - for i := range names { - workers.Start() - go func(name string) { - defer workers.Done() - defer wg.Done() - - if match, err := searchByName(api, name); err != nil { - markFailure() - log.Printf("[FATAL] problem searching for '%s': %v", name, err) - } else { - switch desc { - case BlockExpected: - if match < threshold { - markFailure() - log.Printf("[ERROR] '%s' wasn't blocked (match=%.2f)", name, match) - } else { - log.Printf("[INFO] blocked '%s'", name) - } - case BlockUnexpected: - if match > threshold { - markFailure() - log.Printf("[ERROR] '%s' was blocked (match=%.2f)", name, match) - } else { - if *flagVerbose { - log.Printf("[INFO] didn't block '%s'", name) - } - } - } - } - }(names[i]) - } - - wg.Wait() // block until all requests are finished - - return exitCode -} - -func readNames(path string) ([]string, error) { - fd, err := os.Open(path) - if err != nil { - return nil, fmt.Errorf("problem reading %s: %v", path, err) - } - defer fd.Close() - - scanner := bufio.NewScanner(fd) - - var names []string - for scanner.Scan() { - name := strings.TrimSpace(scanner.Text()) - if strings.HasPrefix(name, "//") || strings.HasPrefix(name, "#") { - continue - } - names = append(names, name) - } - return names, nil -} - -func searchByName(api *moov.APIClient, name string) (float64, error) { - opts := &moov.SearchOpts{ - Limit: optional.NewInt32(1), - Name: optional.NewString(name), - SdnType: optional.NewInterface(*flagSdnType), - XRequestID: optional.NewString(*flagRequestID), - } - - ctx, cancelFunc := context.WithTimeout(context.TODO(), 5*time.Second) - defer cancelFunc() - - search, resp, err := api.WatchmanApi.Search(ctx, opts) - if err != nil { - return 0.0, fmt.Errorf("searchByName: %v", err) - } - defer resp.Body.Close() - - if len(search.SDNs) == 0 { - return 0.0, errors.New("no SDNs returned") - } - return float64(search.SDNs[0].Match), nil -} diff --git a/makefile b/makefile index 16d643a2..9a359c18 100644 --- a/makefile +++ b/makefile @@ -30,12 +30,10 @@ endif ifeq ($(detected_OS),Darwin) ARCH := $(shell uname -m) ifeq ($(ARCH),arm64) - CONFIGURE_FLAGS := --datadir=/tmp/libpostal-data --disable-sse2 - else - CONFIGURE_FLAGS := --datadir=/tmp/libpostal-data + CONFIGURE_FLAGS := --disable-sse2 endif else - CONFIGURE_FLAGS := --datadir=/tmp/libpostal-data + CONFIGURE_FLAGS := endif # Detect if we need sudo @@ -79,14 +77,16 @@ install-libpostal: ./configure $(CONFIGURE_FLAGS) && \ make -j$(shell nproc || echo 4) && \ if [ "$(detected_OS)" = "Windows" ]; then \ - make install; \ + make install && \ + make download-models; \ else \ - $(SUDO) make install; \ + $(SUDO) make install && \ + $(SUDO) make download-models; \ fi .PHONY: install install-linux install-macos install-windows install-libpostal -build: build-server build-batchsearch +build: build-server ifeq ($(OS),Windows_NT) @echo "Skipping webui build on Windows." else @@ -94,10 +94,7 @@ else endif build-server: - go build -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/server github.com/moov-io/watchman/cmd/server - -build-batchsearch: - go build -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/batchsearch github.com/moov-io/watchman/cmd/batchsearch + go build -buildvcs=false -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/server github.com/moov-io/watchman/cmd/server .PHONY: check check: @@ -171,27 +168,3 @@ release-push: quay-push: docker push quay.io/moov/watchman:$(VERSION) docker push quay.io/moov/watchman:latest - -.PHONY: cover-test cover-web -cover-test: - go test -coverprofile=cover.out ./... -cover-web: - go tool cover -html=cover.out - -clean-integration: - docker compose kill && docker compose rm -v -f - -# TODO: this test is working but due to a default timeout on the admin server we get an empty reply -# for now this shouldn't hold up out CI pipeline -test-integration: clean-integration - docker compose up -d - sleep 30 - time curl -v --max-time 30 http://localhost:9094/data/refresh # hangs until download and parsing completes - ./bin/batchsearch -local -threshold 0.95 - -# From https://github.com/genuinetools/img -.PHONY: AUTHORS -AUTHORS: - @$(file >$@,# This file lists all individuals having contributed content to the repository.) - @$(file >>$@,# For how it is generated, see `make AUTHORS`.) - @echo "$(shell git log --format='\n%aN <%aE>' | LC_ALL=C.UTF-8 sort -uf)" >> $@ From 5bfb82d0f405824bb2c198a95c79a67e8a77d3bb Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 16:09:50 -0600 Subject: [PATCH 19/26] build: optimize Dockerfiles for builder cache --- Dockerfile | 47 ++++++++++++++++++++++---------- Dockerfile-openshift | 65 +++++++++++++++++++++++++++++--------------- 2 files changed, 75 insertions(+), 37 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0f45a3e6..67ac3cb5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,18 @@ +# Backend build stage FROM golang:1.23-bookworm as backend ARG VERSION WORKDIR /src -COPY . /src -RUN go mod download -RUN apt-get update && apt-get install -y curl autoconf automake libtool pkg-config git -# Clone and install libpostal +# Install system dependencies first +RUN apt-get update && apt-get install -y \ + curl \ + autoconf \ + automake \ + libtool \ + pkg-config \ + git + +# Clone and build libpostal (rarely changes) RUN git clone https://github.com/openvenues/libpostal.git /src/libpostal WORKDIR /src/libpostal RUN ./bootstrap.sh && \ @@ -14,45 +21,55 @@ RUN ./bootstrap.sh && \ make install && \ ldconfig -# Download libpostal data files +# Download libpostal data (rarely changes) RUN libpostal_data download all /usr/local/share/libpostal -# Build the application +# Copy go.mod and go.sum first to cache dependencies +COPY go.mod go.sum /src/ +RUN go mod download + +# Now copy the rest of the source code (frequently changes) +COPY . /src/ WORKDIR /src -RUN go build -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/server /src/cmd/server +RUN VERSION=${VERSION} make build-server +# Frontend build stage FROM node:22-bookworm as frontend ARG VERSION -COPY webui/ /watchman/ WORKDIR /watchman/ + +# Copy package files first to cache dependencies +COPY webui/package*.json webui/ +WORKDIR /watchman/webui/ RUN npm install --legacy-peer-deps + +# Copy and build frontend source (frequently changes) +COPY webui/ ./ RUN npm run build +# Final stage FROM debian:bookworm LABEL maintainer="Moov " -# Install required runtime dependencies +# Install runtime dependencies RUN apt-get update && \ apt-get install -y \ libssl3 \ ca-certificates \ && rm -rf /var/lib/apt/lists/* -# Create necessary directories +# Create necessary directories and copy libpostal files RUN mkdir -p /usr/local/share/libpostal - -# Copy libpostal shared libraries and configuration COPY --from=backend /usr/local/lib/libpostal.so* /usr/local/lib/ COPY --from=backend /usr/local/lib/pkgconfig/libpostal.pc /usr/local/lib/pkgconfig/ COPY --from=backend /usr/local/share/libpostal/ /usr/local/share/libpostal/ - -# Update shared library cache RUN ldconfig # Copy application files COPY --from=backend /src/bin/server /bin/server -COPY --from=frontend /watchman/build/ /watchman/ +COPY --from=frontend /watchman/webui/build/ /watchman/ +# Set environment variables ENV WEB_ROOT=/watchman/ ENV LD_LIBRARY_PATH=/usr/local/lib ENV LIBPOSTAL_DATA_DIR=/usr/local/share/libpostal diff --git a/Dockerfile-openshift b/Dockerfile-openshift index ff425674..0d52516c 100644 --- a/Dockerfile-openshift +++ b/Dockerfile-openshift @@ -1,6 +1,8 @@ # Stage 1: Install dependencies with root privileges FROM registry.access.redhat.com/ubi9/go-toolset as builder-deps USER root + +# Install system dependencies first - rarely changes RUN dnf install -y --allowerasing --setopt=tsflags=nodocs \ curl \ autoconf \ @@ -13,43 +15,62 @@ RUN dnf install -y --allowerasing --setopt=tsflags=nodocs \ git \ && dnf clean all -# Install libpostal with models - Fixed path handling -RUN git clone https://github.com/openvenues/libpostal \ - && cd libpostal \ - && ./bootstrap.sh \ - && ./configure --prefix=/usr/local \ - && make -j4 \ - && make install \ - && mkdir -p /usr/local/share/libpostal \ - && cd src \ - && PATH=$PATH:/usr/local/bin ./libpostal_data download all /usr/local/share/libpostal \ - && chown -R 1001:0 /usr/local \ - && chmod -R g=u /usr/local +# Install libpostal with models - rarely changes +RUN git clone https://github.com/openvenues/libpostal && \ + cd libpostal && \ + ./bootstrap.sh && \ + ./configure --prefix=/usr/local && \ + make -j4 && \ + make install && \ + mkdir -p /usr/local/share/libpostal + +# Download libpostal data - separate step for better caching +RUN cd libpostal/src && \ + PATH=$PATH:/usr/local/bin ./libpostal_data download all /usr/local/share/libpostal + +# Set permissions - should be last in this stage +RUN chown -R 1001:0 /usr/local && \ + chmod -R g=u /usr/local # Stage 2: Build the application -FROM registry.access.redhat.com/ubi9/go-toolset as builder +FROM registry.access.redhat.com/ubi9/go-toolset AS builder ARG VERSION WORKDIR /opt/app-root/src/ -# Copy the entire /usr and /usr/local directories -COPY --from=builder-deps /usr /usr +# Copy only the necessary files from builder-deps COPY --from=builder-deps /usr/local /usr/local +COPY --from=builder-deps /usr/lib64 /usr/lib64 # Set environment variables for build ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig ENV LD_LIBRARY_PATH=/usr/local/lib -# Copy source and build -COPY . . +# Copy go.mod and go.sum first to cache dependencies +COPY go.mod go.sum ./ RUN go mod download + +# Copy source files +COPY . . + +# Create bin directory and set permissions BEFORE building +USER root +RUN mkdir -p bin && \ + chown -R 1001:0 . && \ + chmod -R g=u . + USER 1001 RUN VERSION=${VERSION} make build-server # Stage 3: Frontend build FROM node:21-bookworm as frontend -COPY webui/ /watchman/ WORKDIR /watchman/ + +# Copy package files first to cache dependencies +COPY webui/package*.json ./ RUN npm install --legacy-peer-deps + +# Copy frontend source and build - frequently changes +COPY webui/ ./ RUN npm run build # Stage 4: Final stage @@ -74,9 +95,9 @@ COPY --from=builder /opt/app-root/src/bin/server /bin/server COPY --from=frontend /watchman/build/ /watchman/ ENV WEB_ROOT=/watchman/ -# Set final permissions -RUN chown -R 1001:0 /bin/server /watchman \ - && chmod -R g=u /bin/server /watchman - +# Set final permissions and switch to non-root user +RUN chown -R 1001:0 /bin/server /watchman && \ + chmod -R g=u /bin/server /watchman USER 1001 + ENTRYPOINT ["/bin/server"] From 8faae7861eba1ca382693e7ac5ec432db78ec05a Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 16:41:13 -0600 Subject: [PATCH 20/26] ofac: correct input for v2 address parsing --- pkg/ofac/mapper.go | 11 +++++++---- pkg/ofac/mapper_business_test.go | 4 ++-- pkg/ofac/mapper_person_test.go | 8 ++++---- pkg/ofac/mapper_vehicles_test.go | 10 +++++----- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/pkg/ofac/mapper.go b/pkg/ofac/mapper.go index 6dcd7f1d..9e648fe3 100644 --- a/pkg/ofac/mapper.go +++ b/pkg/ofac/mapper.go @@ -1,6 +1,7 @@ package ofac import ( + "fmt" "regexp" "strconv" "strings" @@ -106,7 +107,7 @@ func extractCountry(remark string) string { return "" } -func ToEntity(sdn SDN) search.Entity[SDN] { +func ToEntity(sdn SDN, addresses []Address, comments []SDNComments) search.Entity[SDN] { out := search.Entity[SDN]{ Name: sdn.SDNName, Source: search.SourceUSOFAC, @@ -123,7 +124,7 @@ func ToEntity(sdn SDN) search.Entity[SDN] { out.CryptoAddresses = parseCryptoAddresses(remarks) // Extract common fields regardless of entity type - out.Addresses = parseAddresses(remarks) + out.Addresses = parseAddresses(addresses) switch strings.ToLower(strings.TrimSpace(sdn.SDNType)) { case "-0-", "": @@ -213,10 +214,12 @@ func ToEntity(sdn SDN) search.Entity[SDN] { return out } -func parseAddresses(inputs []string) []search.Address { +func parseAddresses(inputs []Address) []search.Address { out := make([]search.Address, len(inputs)) for i := range inputs { - out[i] = address.ParseAddress(inputs[i]) + addr := fmt.Sprintf("%s %s %s", inputs[i].Address, inputs[i].CityStateProvincePostalCode, inputs[i].Country) + + out[i] = address.ParseAddress(addr) } return out } diff --git a/pkg/ofac/mapper_business_test.go b/pkg/ofac/mapper_business_test.go index 08e293bc..6f71f5c3 100644 --- a/pkg/ofac/mapper_business_test.go +++ b/pkg/ofac/mapper_business_test.go @@ -17,7 +17,7 @@ func TestMapper__CompleteBusiness(t *testing.T) { Remarks: "Business Registration Number 51566843 (Hong Kong); Commercial Registry Number CH-020.1.066.499-9 (Switzerland); Company Number 05527424 (United Kingdom)", } - e := ToEntity(*sdn) + e := ToEntity(*sdn, nil, nil) require.Equal(t, "ACME CORPORATION", e.Name) require.Equal(t, search.EntityBusiness, e.Type) require.Equal(t, search.SourceUSOFAC, e.Source) @@ -60,7 +60,7 @@ func TestMapper__CompleteBusinessWithRemarks(t *testing.T) { Remarks: "Business Registration Number 51566843 (Hong Kong); Subsidiary Of: PARENT CORP; Former Name: OLD ACME LTD; Additional Sanctions Information - Subject to Secondary Sanctions", } - e := ToEntity(*sdn) + e := ToEntity(*sdn, nil, nil) // Test affiliations require.Len(t, e.Affiliations, 1) diff --git a/pkg/ofac/mapper_person_test.go b/pkg/ofac/mapper_person_test.go index 50fae703..53cb80a6 100644 --- a/pkg/ofac/mapper_person_test.go +++ b/pkg/ofac/mapper_person_test.go @@ -22,7 +22,7 @@ func TestMapper__Person(t *testing.T) { } require.NotNil(t, sdn) - e := ToEntity(*sdn) + e := ToEntity(*sdn, nil, nil) require.Equal(t, "MORENO, Daniel", e.Name) require.Equal(t, search.EntityPerson, e.Type) require.Equal(t, search.SourceUSOFAC, e.Source) @@ -55,7 +55,7 @@ func TestMapper__CompletePerson(t *testing.T) { Remarks: "DOB 28 Oct 1968; POB Baghdad, Iraq; Additional Sanctions Information - Subject to Secondary Sanctions Pursuant to the Hizballah Financial Sanctions Regulations; alt. Additional Sanctions Information - Subject to Secondary Sanctions; Gender Male; a.k.a. 'SHIBL, Hajji'; nationality Iran; Passport A123456 (Iran) expires 2024; Driver's License No. 04900377 (Moldova) issued 02 Jul 2004; Email Address test@example.com; Phone: +1-123-456-7890; Fax: +1-123-456-7899", } - e := ToEntity(*sdn) + e := ToEntity(*sdn, nil, nil) require.Equal(t, "AL-ZAYDI, Shibl Muhsin 'Ubayd", e.Name) require.Equal(t, search.EntityPerson, e.Type) require.Equal(t, search.SourceUSOFAC, e.Source) @@ -133,7 +133,7 @@ func TestMapper__CompletePersonWithRemarks(t *testing.T) { Remarks: "DOB 28 Oct 1968; POB Baghdad, Iraq; Gender Male; Title: Commander; Former Name: AL-ZAYDI, Muhammad; Linked To: ISLAMIC REVOLUTIONARY GUARD CORPS (IRGC)-QODS FORCE; Additional Sanctions Information - Subject to Secondary Sanctions", } - e := ToEntity(*sdn) + e := ToEntity(*sdn, nil, nil) // Test affiliations require.Len(t, e.Affiliations, 1) @@ -163,7 +163,7 @@ func TestMapper__PersonWithTitle(t *testing.T) { Remarks: "Title: Regional Director", } - e := ToEntity(*sdn) + e := ToEntity(*sdn, nil, nil) require.Equal(t, "SMITH, John", e.Name) require.Equal(t, search.EntityPerson, e.Type) diff --git a/pkg/ofac/mapper_vehicles_test.go b/pkg/ofac/mapper_vehicles_test.go index 8654e0f9..07cde3ca 100644 --- a/pkg/ofac/mapper_vehicles_test.go +++ b/pkg/ofac/mapper_vehicles_test.go @@ -22,7 +22,7 @@ func TestMapper__Vessel(t *testing.T) { } require.NotNil(t, sdn) - e := ToEntity(*sdn) + e := ToEntity(*sdn, nil, nil) require.Equal(t, "ARTAVIL", e.Name) require.Equal(t, search.EntityVessel, e.Type) require.Equal(t, search.SourceUSOFAC, e.Source) @@ -53,7 +53,7 @@ func TestMapper__Aircraft(t *testing.T) { } require.NotNil(t, sdn) - e := ToEntity(*sdn) + e := ToEntity(*sdn, nil, nil) require.Equal(t, "MSN 550", e.Name) require.Equal(t, search.EntityAircraft, e.Type) require.Equal(t, search.SourceUSOFAC, e.Source) @@ -80,7 +80,7 @@ func TestMapper__CompleteVessel(t *testing.T) { Remarks: "Vessel Type Cargo; Flag Malta; IMO 9999999; MMSI 123456789; Tonnage 50,000", } - e := ToEntity(*sdn) + e := ToEntity(*sdn, nil, nil) require.Equal(t, "CARGO VESSEL X", e.Name) require.Equal(t, search.EntityVessel, e.Type) require.Equal(t, search.SourceUSOFAC, e.Source) @@ -108,7 +108,7 @@ func TestMapper__CompleteAircraft(t *testing.T) { Remarks: "Aircraft Type Cargo; Flag United States; Aircraft Model Boeing 747; Manufacture Date 01 Jan 1995; Serial Number (MSN) 12345; ICAO Code B744", } - e := ToEntity(*sdn) + e := ToEntity(*sdn, nil, nil) require.Equal(t, "AIRCRAFT Y", e.Name) require.Equal(t, search.EntityAircraft, e.Type) require.Equal(t, search.SourceUSOFAC, e.Source) @@ -176,7 +176,7 @@ func TestMapper__CompleteVesselWithAllFields(t *testing.T) { Remarks: "Vessel Type Cargo; Flag Malta; IMO 9999999; MMSI 123456789; Tonnage 50,000", } - e := ToEntity(*sdn) + e := ToEntity(*sdn, nil, nil) require.Equal(t, "CARGO VESSEL X", e.Name) require.Equal(t, search.EntityVessel, e.Type) require.Equal(t, search.SourceUSOFAC, e.Source) From bbfba4d6ab7c8f4761fe11611bce362ada3d3cfc Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Wed, 18 Dec 2024 16:42:03 -0600 Subject: [PATCH 21/26] cmd/server: wire up basic /v2/search --- cmd/server/main.go | 38 ++++++++++++++++++++++++++++++++++- internal/search/api_search.go | 2 +- internal/search/service.go | 33 +++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/cmd/server/main.go b/cmd/server/main.go index 58802c57..e060cac0 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -20,11 +20,13 @@ import ( "time" "github.com/moov-io/base/admin" - moovhttp "github.com/moov-io/base/http" "github.com/moov-io/base/http/bind" "github.com/moov-io/base/log" "github.com/moov-io/watchman" + searchv2 "github.com/moov-io/watchman/internal/search" + "github.com/moov-io/watchman/pkg/ofac" + pubsearch "github.com/moov-io/watchman/pkg/search" "github.com/gorilla/mux" ) @@ -182,6 +184,10 @@ func main() { addSearchRoutes(logger, router, searcher) addValuesRoutes(logger, router, searcher) + genericSDNs := generalizeOFACSDNs(searcher.SDNs, searcher.Addresses) + v2SearchService := searchv2.NewService[ofac.SDN](logger, genericSDNs) + addSearchV2Routes(logger, router, v2SearchService) + // Setup our web UI to be served as well setupWebui(logger, router, *flagBasePath) @@ -263,3 +269,33 @@ func handleDownloadStats(updates chan *DownloadStats, handle func(stats *Downloa } } } + +func generalizeOFACSDNs(input []*SDN, ofacAddresses []*Address) []pubsearch.Entity[ofac.SDN] { + var out []pubsearch.Entity[ofac.SDN] + for _, sdn := range input { + if sdn.SDN == nil { + continue + } + + var addresses []ofac.Address + for _, ofacAddr := range ofacAddresses { + if ofacAddr.Address == nil { + continue + } + + if sdn.EntityID == ofacAddr.Address.EntityID { + addresses = append(addresses, *ofacAddr.Address) + } + } + + entity := ofac.ToEntity(*sdn.SDN, addresses, nil) + if len(entity.Addresses) > 0 && entity.Addresses[0].Line1 != "" { + out = append(out, entity) + } + } + return out +} + +func addSearchV2Routes(logger log.Logger, r *mux.Router, service searchv2.Service) { + searchv2.NewController(logger, service).AppendRoutes(r) +} diff --git a/internal/search/api_search.go b/internal/search/api_search.go index 0bf34117..038bc6f3 100644 --- a/internal/search/api_search.go +++ b/internal/search/api_search.go @@ -37,5 +37,5 @@ func (c *controller) AppendRoutes(router *mux.Router) *mux.Router { } func (c *controller) search(w http.ResponseWriter, r *http.Request) { - // TODO(adam): + c.service.Search(r.Context()) } diff --git a/internal/search/service.go b/internal/search/service.go index 25b13347..f822032f 100644 --- a/internal/search/service.go +++ b/internal/search/service.go @@ -1,5 +1,36 @@ package search +import ( + "context" + "encoding/json" + "fmt" + + "github.com/moov-io/base/log" + "github.com/moov-io/watchman/pkg/search" +) + type Service interface { - // TODO(adam): + Search(ctx context.Context) +} + +func NewService[T any](logger log.Logger, entities []search.Entity[T]) Service { + return &service[T]{ + logger: logger, + entities: entities, + } +} + +type service[T any] struct { + logger log.Logger + entities []search.Entity[T] +} + +func (s *service[T]) Search(ctx context.Context) { + for _, entity := range s.entities { + if len(entity.Addresses) > 0 { + bs, _ := json.Marshal(entity) + fmt.Printf("\n\n %s \n", string(bs)) + return + } + } } From a9752891d14cdafc0eb6dd0ad45b966a040b8391 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Thu, 19 Dec 2024 11:57:55 -0600 Subject: [PATCH 22/26] fix: add build tag when libpostal is linked / available --- Dockerfile | 2 +- Dockerfile-openshift | 2 +- makefile | 8 +++----- pkg/address/address.go | 12 ++++++++++++ .../{address_unix.go => address_libpostal.go} | 2 ++ 5 files changed, 19 insertions(+), 7 deletions(-) rename pkg/address/{address_unix.go => address_libpostal.go} (99%) diff --git a/Dockerfile b/Dockerfile index 67ac3cb5..cad50f21 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,7 +31,7 @@ RUN go mod download # Now copy the rest of the source code (frequently changes) COPY . /src/ WORKDIR /src -RUN VERSION=${VERSION} make build-server +RUN VERSION=${VERSION} GOTAGS="-tags libpostal" make build-server # Frontend build stage FROM node:22-bookworm as frontend diff --git a/Dockerfile-openshift b/Dockerfile-openshift index 0d52516c..b88fb6b7 100644 --- a/Dockerfile-openshift +++ b/Dockerfile-openshift @@ -59,7 +59,7 @@ RUN mkdir -p bin && \ chmod -R g=u . USER 1001 -RUN VERSION=${VERSION} make build-server +RUN VERSION=${VERSION} GOTAGS="-tags libpostal" make build-server # Stage 3: Frontend build FROM node:21-bookworm as frontend diff --git a/makefile b/makefile index 9a359c18..8a31b914 100644 --- a/makefile +++ b/makefile @@ -77,11 +77,9 @@ install-libpostal: ./configure $(CONFIGURE_FLAGS) && \ make -j$(shell nproc || echo 4) && \ if [ "$(detected_OS)" = "Windows" ]; then \ - make install && \ - make download-models; \ + make install; \ else \ - $(SUDO) make install && \ - $(SUDO) make download-models; \ + $(SUDO) make install; \ fi .PHONY: install install-linux install-macos install-windows install-libpostal @@ -94,7 +92,7 @@ else endif build-server: - go build -buildvcs=false -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/server github.com/moov-io/watchman/cmd/server + go build -buildvcs=false ${GOTAGS} -ldflags "-X github.com/moov-io/watchman.Version=${VERSION}" -o ./bin/server github.com/moov-io/watchman/cmd/server .PHONY: check check: diff --git a/pkg/address/address.go b/pkg/address/address.go index 855782dd..ba797359 100644 --- a/pkg/address/address.go +++ b/pkg/address/address.go @@ -1 +1,13 @@ +//go:build !libpostal + package address + +import ( + "github.com/moov-io/watchman/pkg/search" +) + +func ParseAddress(input string) search.Address { + var out search.Address + + return out +} diff --git a/pkg/address/address_unix.go b/pkg/address/address_libpostal.go similarity index 99% rename from pkg/address/address_unix.go rename to pkg/address/address_libpostal.go index 6cff2424..2439d390 100644 --- a/pkg/address/address_unix.go +++ b/pkg/address/address_libpostal.go @@ -1,3 +1,5 @@ +//go:build libpostal + package address import ( From e024efc0878a906488aff1c68eece49167a6418b Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Thu, 19 Dec 2024 12:03:21 -0600 Subject: [PATCH 23/26] build: remove "make install" from CI steps --- .github/workflows/codeql.yaml | 3 --- .github/workflows/fuzz.yml | 3 --- .github/workflows/go.yml | 9 --------- .github/workflows/openshift.yml | 3 --- .github/workflows/release.yml | 6 ------ 5 files changed, 24 deletions(-) diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml index 0f83fd27..9287a65b 100644 --- a/.github/workflows/codeql.yaml +++ b/.github/workflows/codeql.yaml @@ -15,9 +15,6 @@ jobs: - name: Checkout repository uses: actions/checkout@v3 - - name: Install deps - run: make install - - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index abf5b9a2..c074af37 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -25,9 +25,6 @@ jobs: with: fetch-depth: 0 - - name: Install deps - run: make install - - name: Fuzz run: | go test ./pkg/usaddress/... -fuzz Fuzz -fuzztime 10m diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index a571e2da..f56881ea 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -24,9 +24,6 @@ jobs: go-version: stable id: go - - name: Install deps - run: make install - - name: Run Tests (Linux) if: runner.os == 'Linux' run: make check @@ -56,9 +53,6 @@ jobs: run: | choco install -y make mingw - - name: Install deps - run: make install - - name: Run Short Tests (Non-Linux) run: | go test ./... -short @@ -91,9 +85,6 @@ jobs: sudo systemctl stop mono-xsp4.service || true sudo killall mono || true - - name: Install deps - run: make install - - name: Build Frontend run: make build diff --git a/.github/workflows/openshift.yml b/.github/workflows/openshift.yml index 03e2aa42..4004de38 100644 --- a/.github/workflows/openshift.yml +++ b/.github/workflows/openshift.yml @@ -29,9 +29,6 @@ jobs: - name: Check out code into the Go module directory uses: actions/checkout@v3 - - name: Install deps - run: make install - - name: Docker Build if: runner.os == 'Linux' run: make docker-openshift diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1ef59875..f844221d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -21,9 +21,6 @@ jobs: - name: Check out code into the Go module directory uses: actions/checkout@v4 - - name: Install deps - run: make install - - name: Short Tests if: runner.os == 'Linux' env: @@ -77,9 +74,6 @@ jobs: - name: Check out code into the Go module directory uses: actions/checkout@v4 - - name: Install deps - run: make install - - name: Load Release URL File from release job uses: actions/download-artifact@v4 with: From d1aa8107b96ee6b915c1e9cc658f2176f637b1b4 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Thu, 19 Dec 2024 12:05:23 -0600 Subject: [PATCH 24/26] meta: remove outdated file [skip ci] --- .codecov.yml | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 .codecov.yml diff --git a/.codecov.yml b/.codecov.yml deleted file mode 100644 index 37d7e635..00000000 --- a/.codecov.yml +++ /dev/null @@ -1,29 +0,0 @@ -codecov: - notify: - require_ci_to_pass: yes - -coverage: - precision: 2 - round: down - range: "70...100" - - status: - project: yes - patch: yes - changes: no - -parsers: - gcov: - branch_detection: - conditional: yes - loop: yes - method: no - macro: no - -comment: - layout: "header, diff" - behavior: default - require_changes: no - -ignore: - - "*Errors.go" # ignore error files From a093b3f30a6b7774c98bd8280da8ae63afc81ae0 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Thu, 19 Dec 2024 12:07:37 -0600 Subject: [PATCH 25/26] address: only run libpostal tests with the build tag provided --- pkg/address/address_libpostal_test.go | 72 +++++++++++++++++++++++++++ pkg/address/address_test.go | 71 +------------------------- 2 files changed, 74 insertions(+), 69 deletions(-) create mode 100644 pkg/address/address_libpostal_test.go diff --git a/pkg/address/address_libpostal_test.go b/pkg/address/address_libpostal_test.go new file mode 100644 index 00000000..d9a6215d --- /dev/null +++ b/pkg/address/address_libpostal_test.go @@ -0,0 +1,72 @@ +//go:build libpostal + +package address + +import ( + "fmt" + "testing" + + "github.com/moov-io/watchman/pkg/search" + + postal "github.com/openvenues/gopostal/parser" + "github.com/stretchr/testify/require" +) + +func TestParseAddress(t *testing.T) { + cases := []struct { + input string + expected search.Address + }{ + { + input: "101 Maple Street Apt 202 Bigcity, New York 11222", + expected: search.Address{ + Line1: "101 maple street", + Line2: "apt 202", + City: "bigcity", + PostalCode: "11222", + State: "new york", + }, + }, + } + for _, tc := range cases { + name := fmt.Sprintf("%#v", tc.expected) + + t.Run(name, func(t *testing.T) { + got := ParseAddress(tc.input) + require.Equal(t, tc.expected, got) + }) + } +} + +func TestOrganizeLibpostalComponents(t *testing.T) { + cases := []struct { + parts []postal.ParsedComponent + expected search.Address + }{ + { + parts: []postal.ParsedComponent{ + {Label: "house_number", Value: "101"}, + {Label: "road", Value: "Main Street"}, + {Label: "city", Value: "Springfield"}, + {Label: "state", Value: "Illinois"}, + {Label: "postcode", Value: "62704"}, + {Label: "country", Value: "United States"}, + }, + expected: search.Address{ + Line1: "101 Main Street", + City: "Springfield", + PostalCode: "62704", + State: "Illinois", + Country: "United States", + }, + }, + } + for _, tc := range cases { + name := fmt.Sprintf("%#v", tc.expected) + + t.Run(name, func(t *testing.T) { + got := organizeLibpostalComponents(tc.parts) + require.Equal(t, tc.expected, got) + }) + } +} diff --git a/pkg/address/address_test.go b/pkg/address/address_test.go index ab44397d..84a67683 100644 --- a/pkg/address/address_test.go +++ b/pkg/address/address_test.go @@ -1,70 +1,3 @@ -package address - -import ( - "fmt" - "testing" - - "github.com/moov-io/watchman/pkg/search" - - postal "github.com/openvenues/gopostal/parser" - "github.com/stretchr/testify/require" -) +//go:build !libpostal -func TestParseAddress(t *testing.T) { - cases := []struct { - input string - expected search.Address - }{ - { - input: "101 Maple Street Apt 202 Bigcity, New York 11222", - expected: search.Address{ - Line1: "101 maple street", - Line2: "apt 202", - City: "bigcity", - PostalCode: "11222", - State: "new york", - }, - }, - } - for _, tc := range cases { - name := fmt.Sprintf("%#v", tc.expected) - - t.Run(name, func(t *testing.T) { - got := ParseAddress(tc.input) - require.Equal(t, tc.expected, got) - }) - } -} - -func TestOrganizeLibpostalComponents(t *testing.T) { - cases := []struct { - parts []postal.ParsedComponent - expected search.Address - }{ - { - parts: []postal.ParsedComponent{ - {Label: "house_number", Value: "101"}, - {Label: "road", Value: "Main Street"}, - {Label: "city", Value: "Springfield"}, - {Label: "state", Value: "Illinois"}, - {Label: "postcode", Value: "62704"}, - {Label: "country", Value: "United States"}, - }, - expected: search.Address{ - Line1: "101 Main Street", - City: "Springfield", - PostalCode: "62704", - State: "Illinois", - Country: "United States", - }, - }, - } - for _, tc := range cases { - name := fmt.Sprintf("%#v", tc.expected) - - t.Run(name, func(t *testing.T) { - got := organizeLibpostalComponents(tc.parts) - require.Equal(t, tc.expected, got) - }) - } -} +package address From 4caee017c7ecef849281c180eeb4a82831be7df1 Mon Sep 17 00:00:00 2001 From: Adam Shannon Date: Thu, 19 Dec 2024 12:14:28 -0600 Subject: [PATCH 26/26] meta: ignore printf for now --- internal/search/service.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/search/service.go b/internal/search/service.go index f822032f..57a53841 100644 --- a/internal/search/service.go +++ b/internal/search/service.go @@ -29,7 +29,7 @@ func (s *service[T]) Search(ctx context.Context) { for _, entity := range s.entities { if len(entity.Addresses) > 0 { bs, _ := json.Marshal(entity) - fmt.Printf("\n\n %s \n", string(bs)) + fmt.Printf("\n\n %s \n", string(bs)) //nolint:forbidigo return } }