Skip to content

Commit

Permalink
feat/SSM-30: introduced lp1h doc parsers & types (#32)
Browse files Browse the repository at this point in the history
* feat/SSM-30: introduced lp1h doc parsers & types

* feat/SSM-30: refactored registry to enable better transparency on document handling.

* feat/SSM-30: improved test coverage fir job queue.

* feat/SSM-30: improved validation behaviours.

* feat/SSM-30: added validation logic for LP1H and bug fixes.

* feat/SSM-30: change name of validate section function

* feat/SSM-30: naming convention change for commonValidators.
  • Loading branch information
j1mb0b authored Jan 23, 2025
1 parent c3a4849 commit 04574a1
Show file tree
Hide file tree
Showing 25 changed files with 1,021 additions and 536 deletions.
17 changes: 15 additions & 2 deletions service-app/internal/aws/job_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,27 @@ var scannedCaseResponse = &types.ScannedCaseResponse{
const fileName = "SET_DDC_20250106093401__LPA_677ba389ab101.xml"

func TestAwsQueue_PHPSerialization(t *testing.T) {
// Generate the message body
message := createMessageBody(scannedCaseResponse, fileName)

messageJson, err := json.Marshal(message)
assert.NoError(t, err, "Failed to marshal message to JSON")

expectedOutput := `{"content":"a:2:{s:3:\"uid\";s:12:\"700000001219\";s:8:\"filename\";s:45:\"SET_DDC_20250106093401__LPA_677ba389ab101.xml\";}","metadata":{"__name__":"Ddc\\Job\\FormJob"}}`
assert.JSONEq(t, expectedOutput, string(messageJson), "The serialized message does not match the expected output.")

var actual map[string]interface{}
var expected map[string]interface{}

err = json.Unmarshal(messageJson, &actual)
assert.NoError(t, err)

err = json.Unmarshal([]byte(expectedOutput), &expected)
assert.NoError(t, err)

assert.Equal(t, expected["metadata"], actual["metadata"])
expectedContent := expected["content"].(string)
actualContent := actual["content"].(string)

assert.Equal(t, expectedContent, actualContent, "The serialized PHP content does not match the expected output.")
}

func TestAwsQueue_QueueSetForProcessing(t *testing.T) {
Expand Down
7 changes: 7 additions & 0 deletions service-app/internal/constants/document_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const (
DocumentTypeLP1F = "LP1F"
DocumentTypeLP1H = "LP1H"
DocumentTypeLP2 = "LP2"
DocumentCorresp = "Correspondence"
)

const (
Expand All @@ -22,6 +23,12 @@ const (
)

var (
SupprotedDocumentTypes = []string{
DocumentTypeLP1F,
DocumentTypeLP1H,
DocumentCorresp,
}

LPATypeDocuments = []string{
DocumentTypeLPA002,
DocumentTypeLP1F,
Expand Down
48 changes: 48 additions & 0 deletions service-app/internal/factory/document_component.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package factory

import (
"fmt"

"github.com/ministryofjustice/opg-scanning/internal/parser"
"github.com/ministryofjustice/opg-scanning/internal/parser/corresp_parser"
"github.com/ministryofjustice/opg-scanning/internal/parser/lp1f_parser"
"github.com/ministryofjustice/opg-scanning/internal/parser/lp1h_parser"
)

// Component defines a registry entry for a document type.
type Component struct {
Parser func([]byte) (interface{}, error)
Validator parser.CommonValidator
Sanitizer parser.CommonSanitizer
}

func GetComponent(docType string) (Component, error) {
switch docType {
case "LP1H":
return Component{
Parser: func(data []byte) (interface{}, error) {
return lp1h_parser.Parse(data)
},
Validator: lp1h_parser.NewValidator(),
Sanitizer: lp1h_parser.NewSanitizer(),
}, nil
case "LP1F":
return Component{
Parser: func(data []byte) (interface{}, error) {
return lp1f_parser.Parse(data)
},
Validator: lp1f_parser.NewValidator(),
Sanitizer: lp1f_parser.NewSanitizer(),
}, nil
case "Correspondence":
return Component{
Parser: func(data []byte) (interface{}, error) {
return corresp_parser.Parse(data)
},
Validator: corresp_parser.NewValidator(),
Sanitizer: corresp_parser.NewSanitizer(),
}, nil
default:
return Component{}, fmt.Errorf("unsupported docType: %s", docType)
}
}
12 changes: 7 additions & 5 deletions service-app/internal/factory/document_factory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"github.com/ministryofjustice/opg-scanning/config"
"github.com/ministryofjustice/opg-scanning/internal/logger"
"github.com/ministryofjustice/opg-scanning/internal/types"
"github.com/ministryofjustice/opg-scanning/internal/types/lpf1_types"
"github.com/ministryofjustice/opg-scanning/internal/types/lp1f_types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand All @@ -29,7 +29,9 @@ func TestProcessDocument_LP1F(t *testing.T) {
}

// Create a new DocumentProcessor using the factory
registry := NewRegistry()
registry, err := NewRegistry()
require.NoError(t, err, "Failed create Registry")

cfg := config.NewConfig()
logger := logger.NewLogger(cfg)
processor, err := NewDocumentProcessor(doc, doc.Type, "XML", registry, logger)
Expand All @@ -39,15 +41,15 @@ func TestProcessDocument_LP1F(t *testing.T) {
processedDoc, err := processor.Process()
require.NoError(t, err, "Document processing failed")

lp1fDoc, ok := processedDoc.(*lpf1_types.LP1FDocument)
require.True(t, ok, "expected processedDoc to be of type *lp1f_types.LP1FDocument")
lp1fDoc, ok := processedDoc.(*lp1f_types.LP1FDocument)
require.True(t, ok, "Expected processedDoc to be of type *lp1f_types.LP1FDocument")

assert.Equal(t, "ANDREW ROBERT", lp1fDoc.Page1.Section1.FirstName, "FirstName mismatch")
assert.Equal(t, "HEPBURN", lp1fDoc.Page1.Section1.LastName, "LastName mismatch")
}

func loadXMLFile(t *testing.T, filepath string) string {
data, err := os.ReadFile(filepath)
require.NoError(t, err, "failed to read XML file")
require.NoError(t, err, "Failed to read XML file")
return base64.StdEncoding.EncodeToString(data)
}
45 changes: 18 additions & 27 deletions service-app/internal/factory/document_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,10 @@ package factory
import (
"fmt"

"github.com/ministryofjustice/opg-scanning/internal/constants"
"github.com/ministryofjustice/opg-scanning/internal/parser"
"github.com/ministryofjustice/opg-scanning/internal/parser/corresp_parser"
"github.com/ministryofjustice/opg-scanning/internal/parser/lp1f_parser"
)

// Component defines a registry entry for a document type.
type Component struct {
Parser func([]byte) (interface{}, error)
Validator parser.CommonValidator
Sanitizer parser.CommonSanitizer
}

// Defines the behavior for a document registry.
type RegistryInterface interface {
GetParser(docType string) (func([]byte) (interface{}, error), error)
Expand All @@ -28,25 +20,24 @@ type Registry struct {
}

// Initializes the registry with doc type handlers.
func NewRegistry() *Registry {
return &Registry{
components: map[string]Component{
"LP1F": {
Parser: func(data []byte) (interface{}, error) {
return lp1f_parser.Parse(data)
},
Validator: lp1f_parser.NewValidator(),
Sanitizer: lp1f_parser.NewSanitizer(),
},
"Correspondence": {
Parser: func(data []byte) (interface{}, error) {
return corresp_parser.Parse(data)
},
Validator: corresp_parser.NewValidator(),
Sanitizer: corresp_parser.NewSanitizer(),
},
},
func NewRegistry() (*Registry, error) {
components := make(map[string]Component)

// List of supported document types
docTypes := constants.SupprotedDocumentTypes

// Populate the registry using the utility function
for _, docType := range docTypes {
component, err := GetComponent(docType)
if err != nil {
return nil, fmt.Errorf("error getting component for %s: %v", docType, err)
}
components[docType] = component
}

return &Registry{
components: components,
}, nil
}

func (r *Registry) GetParser(docType string) (func([]byte) (interface{}, error), error) {
Expand Down
7 changes: 6 additions & 1 deletion service-app/internal/ingestion/job_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,12 @@ func (q *JobQueue) StartWorkerPool(ctx context.Context, numWorkers int) {
defer close(done)

// Initialize document processor
registry := factory.NewRegistry()
registry, err := factory.NewRegistry()
if err != nil {
q.logger.Error("Worker %d failed to create registry, job: %v\n", nil, workerID, err)
return
}

processor, err := factory.NewDocumentProcessor(job.Data, job.Data.Type, job.format, registry, q.logger)
if err != nil {
q.logger.Error("Worker %d failed to initialize processor for job: %v\n", nil, workerID, err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,19 @@ import (
"github.com/ministryofjustice/opg-scanning/internal/types"
)

func DocumentParser(data []byte, doc interface{}) (interface{}, error) {
if err := xml.Unmarshal(data, doc); err != nil {
return nil, err
}

// Validate required fields based on struct tags
if err := ValidateStruct(doc); err != nil {
return nil, err
}

return doc, nil
}

func BaseParserXml(data []byte) (*types.BaseSet, error) {
var parsed types.BaseSet
if err := xml.Unmarshal(data, &parsed); err != nil {
Expand All @@ -17,7 +30,6 @@ func BaseParserXml(data []byte) (*types.BaseSet, error) {
return &parsed, nil
}

// TODO: Check if this is needed due to manual validation overrides.
// ValidateStruct checks if the provided struct or its nested structs
// have all fields marked with the "required" tag present and non-empty.
// It supports pointer dereferencing and recursive validation for nested structs.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import (
)

type Sanitizer struct {
doc *corresp_types.Correspondence
commonSanitizer *parser.Sanitizer
doc *corresp_types.Correspondence
baseSanitizer *parser.BaseSanitizer
}

func NewSanitizer() *Sanitizer {
Expand All @@ -24,13 +24,13 @@ func (v *Sanitizer) Setup(doc interface{}) error {
}

v.doc = doc.(*corresp_types.Correspondence)
v.commonSanitizer = parser.NewSanitizer(v.doc)
v.baseSanitizer = parser.NewBaseSanitizer(v.doc)

return nil
}

func (s *Sanitizer) Sanitize() (interface{}, error) {
if err := s.commonSanitizer.SanitizeStruct(s.doc); err != nil {
if err := s.baseSanitizer.SanitizeStruct(s.doc); err != nil {
return nil, err
}

Expand Down
10 changes: 5 additions & 5 deletions service-app/internal/parser/corresp_parser/corresp_validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import (
)

type Validator struct {
doc *corresp_types.Correspondence
commonValidator *parser.Validator
doc *corresp_types.Correspondence
baseValidator *parser.BaseValidator
}

func NewValidator() *Validator {
Expand All @@ -24,17 +24,17 @@ func (v *Validator) Setup(doc interface{}) error {
}

v.doc = doc.(*corresp_types.Correspondence)
v.commonValidator = parser.NewValidator(v.doc)
v.baseValidator = parser.NewBaseValidator(v.doc)

return nil
}

func (v *Validator) Validate() error {
// Common witness validations
v.commonValidator.WitnessSignatureFullNameAddressValidator("Page10", "Section9")
v.baseValidator.WitnessSignatureFullNameAddressValidator("Page10", "Section9")

// Return errors if any
if messages := v.commonValidator.GetValidatorErrorMessages(); len(messages) > 0 {
if messages := v.baseValidator.GetValidatorErrorMessages(); len(messages) > 0 {
return fmt.Errorf("failed to validate LP1F document: %v", messages)
}
return nil
Expand Down
17 changes: 3 additions & 14 deletions service-app/internal/parser/lp1f_parser/lp1f_parser.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,11 @@
package lp1f_parser

import (
"encoding/xml"

"github.com/ministryofjustice/opg-scanning/internal/parser"
lp1f_types "github.com/ministryofjustice/opg-scanning/internal/types/lpf1_types"
lp1f_types "github.com/ministryofjustice/opg-scanning/internal/types/lp1f_types"
)

func Parse(data []byte) (*lp1f_types.LP1FDocument, error) {
func Parse(data []byte) (interface{}, error) {
doc := &lp1f_types.LP1FDocument{}
if err := xml.Unmarshal(data, doc); err != nil {
return nil, err
}

// Validate required fields based on struct tags
if err := parser.ValidateStruct(doc); err != nil {
return nil, err
}

return doc, nil
return parser.DocumentParser(data, doc)
}
13 changes: 6 additions & 7 deletions service-app/internal/parser/lp1f_parser/lp1f_sanitizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@ import (
"fmt"

"github.com/ministryofjustice/opg-scanning/internal/parser"
"github.com/ministryofjustice/opg-scanning/internal/types/lpf1_types"
lp1f_types "github.com/ministryofjustice/opg-scanning/internal/types/lpf1_types"
"github.com/ministryofjustice/opg-scanning/internal/types/lp1f_types"
)

type Sanitizer struct {
doc *lp1f_types.LP1FDocument
commonSanitizer *parser.Sanitizer
doc *lp1f_types.LP1FDocument
baseSanitizer *parser.BaseSanitizer
}

func NewSanitizer() *Sanitizer {
Expand All @@ -24,15 +23,15 @@ func (v *Sanitizer) Setup(doc interface{}) error {
return fmt.Errorf("document is nil")
}

v.doc = doc.(*lpf1_types.LP1FDocument)
v.commonSanitizer = parser.NewSanitizer(v.doc)
v.doc = doc.(*lp1f_types.LP1FDocument)
v.baseSanitizer = parser.NewBaseSanitizer(v.doc)

return nil
}

func (s *Sanitizer) Sanitize() (interface{}, error) {
// Sanitize the entire struct dynamically
if err := s.commonSanitizer.SanitizeStruct(s.doc); err != nil {
if err := s.baseSanitizer.SanitizeStruct(s.doc); err != nil {
return nil, err
}

Expand Down
Loading

0 comments on commit 04574a1

Please sign in to comment.