-
Notifications
You must be signed in to change notification settings - Fork 1
/
DataImporter.cs
96 lines (76 loc) · 2.7 KB
/
DataImporter.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
using Microsoft.SemanticKernel;
namespace SKIngest;
public class DataImporter
{
private List<ITransform> _Transforms = new();
private List<IDataSource> _Datasources = new();
private readonly IKernel _SemanticKernel;
public DataImporter(IKernel sk)
{
_SemanticKernel = sk;
}
public void AddDataSource(IDataSource dataSource)
{
_Datasources.Add(dataSource);
}
public void AddTransform(ITransform transform)
{
_Transforms.Add(transform);
}
public async Task ProcessAsync(string destinationCollection)
{
if(!_Datasources.Any())
{
throw new Exception("Must have at least one data source defined before invoking run");
}
foreach (var ds in _Datasources)
{
//data sources may load into one or more resource instances
var resources = await ds.Load();
//only supporting text resources for now
var inputResources = resources.OfType<TextResource>();
var processedResources = new List<TextResource>();
if(!_Transforms.Any())
{
processedResources = inputResources as List<TextResource>;
}
else
{
var resourceQueue = new Queue<Resource>(inputResources);
while (resourceQueue.Count > 0)
{
var resource = resourceQueue.Dequeue();
var state = await _RunTransforms(resource);
if (state.Completed.Any())
{
processedResources.AddRange(state.Completed.Cast<TextResource>());
}
//anything pending was newly created, needs to go into the queue
foreach (var item in state.Pending)
{
resourceQueue.Enqueue(item);
}
}
}
foreach (var resource in processedResources)
{
//once all transforms are complete, generate embeddings and store
await _SemanticKernel.Memory.SaveInformationAsync(destinationCollection, resource.Value, resource.Id);
}
}
}
private async Task<TransformState> _RunTransforms(Resource resource)
{
var state = new TransformState();
foreach (var tf in _Transforms)
{
state.Completed.AddRange(await tf.Run(resource));
}
return state;
}
private class TransformState
{
public List<Resource> Completed { get; set; } = new();
public Queue<Resource> Pending { get; set; } = new();
}
}