Skip to content

Commit

Permalink
Merge branch 'master' into test-review-j-s
Browse files Browse the repository at this point in the history
  • Loading branch information
paulirwin committed Feb 15, 2024
2 parents 531797f + b1476ae commit 77cf172
Show file tree
Hide file tree
Showing 27 changed files with 556 additions and 48 deletions.
4 changes: 0 additions & 4 deletions .build/TestReferences.Common.targets
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,4 @@
<PackageReference Include="NUnit" Version="$(NUnitPackageVersion)" />
<PackageReference Include="NUnit3TestAdapter" Version="$(NUnit3TestAdapterPackageVersion)" />
</ItemGroup>

<ItemGroup Condition=" '$(TargetFramework)' == 'net461' ">
<PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="$(SystemRuntimeCompilerServicesUnsafePackageVersion)" />
</ItemGroup>
</Project>
4 changes: 2 additions & 2 deletions .build/azure-templates/publish-nuget-packages.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Licensed to the Apache Software Foundation (ASF) under one
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
Expand Down Expand Up @@ -57,7 +57,7 @@ steps:
inputs:
command: push
packagesToPush: '${{ parameters.nugetArtifactName }}/*.nupkg;!${{ parameters.nugetArtifactName }}/*.symbols.nupkg'
publishVstsFeed: '/${{ parameters.artifactFeedID }}'
publishVstsFeed: '${{ parameters.artifactFeedID }}'
allowPackageConflicts: true

- task: PublishSymbols@2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,28 @@ steps:
testResultsArtifactName: '${{ parameters.testResultsArtifactName }}'
testResultsFileName: '${{ parameters.testResultsFileName }}'

# Special case: Only supports net48
# Special case: Only supports net7.0, net6.0 and net48

- template: publish-test-results.yml
parameters:
testProjectName: 'Lucene.Net.Tests.Analysis.OpenNLP'
framework: 'net7.0' # Since condtions are not supported for templates, we check for the file existence within publish-test-results.yml
vsTestPlatform: '${{ parameters.vsTestPlatform }}'
osName: '${{ parameters.osName }}'
testResultsFormat: '${{ parameters.testResultsFormat }}'
testResultsArtifactName: '${{ parameters.testResultsArtifactName }}'
testResultsFileName: '${{ parameters.testResultsFileName }}'

- template: publish-test-results.yml
parameters:
testProjectName: 'Lucene.Net.Tests.Analysis.OpenNLP'
framework: 'net6.0' # Since condtions are not supported for templates, we check for the file existence within publish-test-results.yml
vsTestPlatform: '${{ parameters.vsTestPlatform }}'
osName: '${{ parameters.osName }}'
testResultsFormat: '${{ parameters.testResultsFormat }}'
testResultsArtifactName: '${{ parameters.testResultsArtifactName }}'
testResultsFileName: '${{ parameters.testResultsFileName }}'

- template: publish-test-results.yml
parameters:
testProjectName: 'Lucene.Net.Tests.Analysis.OpenNLP'
Expand Down
12 changes: 9 additions & 3 deletions .build/dependencies.props
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
<ICU4NLanguageDataPackageVersion>$(ICU4NPackageVersion)</ICU4NLanguageDataPackageVersion>
<ICU4NRegionDataPackageVersion>$(ICU4NPackageVersion)</ICU4NRegionDataPackageVersion>
<ICU4NTransliteratorPackageVersion>$(ICU4NPackageVersion)</ICU4NTransliteratorPackageVersion>
<IKVMPackageVersion>8.7.5</IKVMPackageVersion>
<IKVMMavenSdkPackageVersion>1.6.7</IKVMMavenSdkPackageVersion>
<J2NPackageVersion>2.0.0</J2NPackageVersion>
<LiquidTestReportsMarkdownPackageVersion>1.0.9</LiquidTestReportsMarkdownPackageVersion>
<MicrosoftAspNetCoreHttpAbstractionsPackageVersion>2.0.0</MicrosoftAspNetCoreHttpAbstractionsPackageVersion>
Expand Down Expand Up @@ -68,19 +70,23 @@
<NewtonsoftJsonPackageVersion>13.0.1</NewtonsoftJsonPackageVersion>
<NUnit3TestAdapterPackageVersion>3.17.0</NUnit3TestAdapterPackageVersion>
<NUnitPackageVersion>3.13.1</NUnitPackageVersion>
<OpenNLPNETPackageVersion>1.9.1.1</OpenNLPNETPackageVersion>
<RandomizedTestingGeneratorsPackageVersion>2.7.8</RandomizedTestingGeneratorsPackageVersion>
<SharpZipLibPackageVersion>1.4.2</SharpZipLibPackageVersion>
<Spatial4nPackageVersion>0.4.1.1</Spatial4nPackageVersion>
<SystemMemoryPackageVersion>4.5.4</SystemMemoryPackageVersion>
<SystemMemoryPackageVersion>4.5.5</SystemMemoryPackageVersion>
<SystemReflectionEmitPackageVersion>4.3.0</SystemReflectionEmitPackageVersion>
<SystemReflectionEmitILGenerationPackageVersion>4.3.0</SystemReflectionEmitILGenerationPackageVersion>
<SystemReflectionTypeExtensionsPackageVersion>4.3.0</SystemReflectionTypeExtensionsPackageVersion>
<SystemRuntimeCompilerServicesUnsafePackageVersion>5.0.0</SystemRuntimeCompilerServicesUnsafePackageVersion>
<SystemRuntimeCompilerServicesUnsafePackageVersion>6.0.0</SystemRuntimeCompilerServicesUnsafePackageVersion>
<SystemRuntimeInteropServicesRuntimeInformationPackageVersion>4.3.0</SystemRuntimeInteropServicesRuntimeInformationPackageVersion>
<SystemSecurityCryptographyXmlPackageVersion>6.0.1</SystemSecurityCryptographyXmlPackageVersion>
<SystemTextEncodingCodePagesPackageVersion>4.3.0</SystemTextEncodingCodePagesPackageVersion>
<SystemTextEncodingCodePagesPackageVersion Condition=" '$(TargetFramework)' == 'net461' ">5.0.0</SystemTextEncodingCodePagesPackageVersion>
<SystemTextJsonPackageVersion>6.0.6</SystemTextJsonPackageVersion>
<TimeZoneConverterPackageVersion>6.1.0</TimeZoneConverterPackageVersion>
</PropertyGroup>
<PropertyGroup Label="Maven Package Reference Versions">
<OpenNLPToolsMavenReferenceVersion>1.9.1</OpenNLPToolsMavenReferenceVersion>
<OSGICoreMavenReferenceVersion>4.2.0</OSGICoreMavenReferenceVersion>
</PropertyGroup>
</Project>
2 changes: 1 addition & 1 deletion .github/workflows/Lucene-Net-Tests-Analysis-OpenNLP.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
fail-fast: false
matrix:
os: [windows-latest, ubuntu-latest]
framework: [net48]
framework: [net7.0, net48]
platform: [x64]
configuration: [Release]
exclude:
Expand Down
7 changes: 7 additions & 0 deletions .github/workflows/Lucene-Net-Website.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ jobs:
run: ./main-repo/websites/site/site.ps1 -Clean
shell: powershell

- name: Upload website as build artifact
uses: actions/upload-artifact@v3
if: ${{always()}}
with:
name: 'website'
path: '${{github.workspace}}/main-repo/websites/site/_site'

- name: Checkout Lucene.Net website
uses: actions/checkout@v3
with:
Expand Down
15 changes: 13 additions & 2 deletions .github/workflows/sonar.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@ on:
push:
branches:
- master
paths: # Exclude anything that isn't the main solution (docs, images, website, etc.)
- '.github/workflows/sonar.yml'
- 'src/**'
- '.build/dependencies.props'
- '.build/TestReferences.Common.*'
- '**/TestTargetFramework.*'
- '*.sln'
- '**/Directory.Build.*'
- '!src/docs/**'
- '!**/*.md'
- '!**/*.txt'
schedule:
- cron: '36 12 * * *' # 12:36 PM UTC, daily (picked an odd start time to try to avoid competing for agents with other projects)
jobs:
Expand All @@ -34,10 +45,10 @@ jobs:
echo "DOTNET_NOLOGO=1" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
echo "DOTNET_CLI_TELEMETRY_OPTOUT=1" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
shell: pwsh
- name: Setup .NET 7 SDK
- name: Setup .NET 8 SDK
uses: actions/setup-dotnet@v3
with:
dotnet-version: '7.0.x'
dotnet-version: '8.0.x'
- name: Set up JDK 21
uses: actions/setup-java@v3
with:
Expand Down
4 changes: 2 additions & 2 deletions Directory.Build.targets
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@
<DebugType>full</DebugType>
</PropertyGroup>

<!-- Features in .NET Framework 4.5+ but not in .NET Standard 2.0 -->
<!-- Features in .NET Framework 4.5+ and .NET 6.0+ but not in .NET Standard 2.0 or .NET Standard 2.1 -->
<!-- net461 is used to test .NET Standard 2.0, so we treat it like it is not part of this group -->
<PropertyGroup Condition="$(TargetFramework.StartsWith('net4')) And '$(TargetFramework)' != 'net461'">
<PropertyGroup Condition=" ($(TargetFramework.StartsWith('net4')) And '$(TargetFramework)' != 'net461') Or $(TargetFramework.StartsWith('net6.')) Or $(TargetFramework.StartsWith('net7.'))">

<DefineConstants>$(DefineConstants);FEATURE_OPENNLP</DefineConstants>

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Lucene version compatibility level 8.2.0
// LUCENENET NOTE: Ported because Lucene.Net.Analysis.OpenNLP requires this to be useful.
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Util;
#nullable enable

namespace Lucene.Net.Analysis.Miscellaneous
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/// <summary>
/// Adds the <see cref="ITypeAttribute.Type"/> as a synonym,
/// i.e. another token at the same position, optionally with a specified prefix prepended.
/// </summary>
public sealed class TypeAsSynonymFilter : TokenFilter
{
private readonly ICharTermAttribute termAtt;
private readonly ITypeAttribute typeAtt;
private readonly IPositionIncrementAttribute posIncrAtt;
private readonly string? prefix;

private State? savedToken = null;

/// <summary>
/// Initializes a new instance of <see cref="TypeAsSynonymFilter"/> with
/// the specified token stream.
/// </summary>
/// <param name="input">Input token stream.</param>
public TypeAsSynonymFilter(TokenStream input)
: this(input, null)
{
}

/// <summary>
/// Initializes a new instance of <see cref="TypeAsSynonymFilter"/> with
/// the specified token stream and prefix.
/// </summary>
/// <param name="input">Input token stream.</param>
/// <param name="prefix">Prepend this string to every token type emitted as token text.
/// If <c>null</c>, nothing will be prepended.</param>
public TypeAsSynonymFilter(TokenStream input, string? prefix)
: base(input)
{
this.prefix = prefix;
termAtt = AddAttribute<ICharTermAttribute>();
typeAtt = AddAttribute<ITypeAttribute>();
posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
}


public override bool IncrementToken()
{
if (savedToken != null)
{
// Emit last token's type at the same position
RestoreState(savedToken);
savedToken = null;
termAtt.SetEmpty();
if (prefix != null)
{
termAtt.Append(prefix);
}
termAtt.Append(typeAtt.Type);
posIncrAtt.PositionIncrement = 0;
return true;
}
else if (m_input.IncrementToken())
{
// Ho pending token type to emit
savedToken = CaptureState();
return true;
}
return false;
}

public override void Reset()
{
base.Reset();
savedToken = null;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Lucene version compatibility level 8.2.0
// LUCENENET NOTE: Ported because Lucene.Net.Analysis.OpenNLP requires this to be useful.
using Lucene.Net.Analysis.Util;
using System;
using System.Collections.Generic;
#nullable enable

namespace Lucene.Net.Analysis.Miscellaneous
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/// <summary>
/// Factory for <see cref="TypeAsSynonymFilter"/>.
/// <code>
/// &lt;fieldType name="text_type_as_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
/// &lt;analyzer&gt;
/// &lt;tokenizer class="solr.UAX29URLEmailTokenizerFactory"/&gt;
/// &lt;filter class="solr.TypeAsSynonymFilterFactory" prefix="_type_" /&gt;
/// &lt;/analyzer&gt;
/// &lt;/fieldType&gt;
/// </code>
///
/// <para/>
/// If the optional <c>prefix</c> parameter is used, the specified value will be prepended
/// to the type, e.g.with prefix = "_type_", for a token "example.com" with type "&lt;URL&gt;",
/// the emitted synonym will have text "_type_&lt;URL&gt;".
/// </summary>
public class TypeAsSynonymFilterFactory : TokenFilterFactory
{
private readonly string prefix;

public TypeAsSynonymFilterFactory(IDictionary<string, string> args)
: base(args)
{
prefix = Get(args, "prefix"); // default value is null
if (args.Count > 0)
{
throw new ArgumentException(string.Format(J2N.Text.StringFormatter.CurrentCulture, "Unknown parameters: {0}", args));
}
}

public override TokenStream Create(TokenStream input)
{
return new TypeAsSynonymFilter(input, prefix);
}
}
}
15 changes: 11 additions & 4 deletions src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@
<Import Project="$(SolutionDir).build/nuget.props" />

<PropertyGroup>
<TargetFramework>net462</TargetFramework>
<!-- Currently, IKVM doesn't officially support building NetFX on anything but Windows, so we skip it for contributors who may be on various platforms.
We can remove the condition once that has been addressed. See: https://github.com/ikvmnet/ikvm-maven/issues/49 -->
<TargetFrameworks>net6.0</TargetFrameworks>
<TargetFrameworks Condition="$([MSBuild]::IsOsPlatform('Windows'))">$(TargetFrameworks);net472</TargetFrameworks>

<AssemblyTitle>Lucene.Net.Analysis.OpenNLP</AssemblyTitle>
<PackageTags>$(PackageTags);analysis;natural;language;processing;opennlp</PackageTags>
Expand All @@ -39,8 +42,6 @@
<RootNamespace>Lucene.Net.Analysis.OpenNlp</RootNamespace>
</PropertyGroup>



<ItemGroup>
<ProjectReference Include="..\dotnet\Lucene.Net.ICU\Lucene.Net.ICU.csproj" />
<ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj" />
Expand All @@ -49,7 +50,13 @@

<ItemGroup>
<PackageReference Include="ICU4N" Version="$(ICU4NPackageVersion)" />
<PackageReference Include="OpenNLP.NET" Version="$(OpenNLPNETPackageVersion)" />
<PackageReference Include="IKVM" Version="$(IKVMPackageVersion)" />
<PackageReference Include="IKVM.Maven.Sdk" Version="$(IKVMMavenSdkPackageVersion)" />
</ItemGroup>

<ItemGroup>
<MavenReference Include="org.apache.opennlp:opennlp-tools" Version="$(OpenNLPToolsMavenReferenceVersion)" />
<MavenReference Include="org.osgi:org.osgi.core" Version="$(OSGICoreMavenReferenceVersion)" />
</ItemGroup>

</Project>
7 changes: 7 additions & 0 deletions src/Lucene.Net.Analysis.OpenNLP/OpenNLPChunkerFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ namespace Lucene.Net.Analysis.OpenNlp
/// <summary>
/// Run OpenNLP chunker. Prerequisite: the <see cref="OpenNLPTokenizer"/> and <see cref="OpenNLPPOSFilter"/> must precede this filter.
/// Tags terms in the <see cref="ITypeAttribute"/>, replacing the POS tags previously put there by <see cref="OpenNLPPOSFilter"/>.
/// <para/>
/// The <see cref="Lucene.Net.Analysis.Payloads.TypeAsPayloadTokenFilter"/> can be used to copy the POS tag values to
/// <see cref="Lucene.Net.Analysis.TokenAttributes.IPayloadAttribute"/>, which will index the value. Alternatively, the
/// <see cref="Lucene.Net.Analysis.Miscellaneous.TypeAsSynonymFilter"/> creates a cloned token at the same position as
/// each tagged token, and copies the <see cref="Lucene.Net.Analysis.TokenAttributes.ITypeAttribute"/> value to the
/// <see cref="Lucene.Net.Analysis.TokenAttributes.ICharTermAttribute"/>, optionally with a customized prefix
/// (so that tags effectively occupy a different namespace from token text).
/// </summary>
public sealed class OpenNLPChunkerFilter : TokenFilter
{
Expand Down
7 changes: 7 additions & 0 deletions src/Lucene.Net.Analysis.OpenNLP/OpenNLPPOSFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ namespace Lucene.Net.Analysis.OpenNlp

/// <summary>
/// Run OpenNLP POS tagger. Tags all terms in the <see cref="ITypeAttribute"/>.
/// <para/>
/// The <see cref="Lucene.Net.Analysis.Payloads.TypeAsPayloadTokenFilter"/> can be used to copy the POS tag values to
/// <see cref="Lucene.Net.Analysis.TokenAttributes.IPayloadAttribute"/>, which will index the value. Alternatively, the
/// <see cref="Lucene.Net.Analysis.Miscellaneous.TypeAsSynonymFilter"/> creates a cloned token at the same position as
/// each tagged token, and copies the <see cref="Lucene.Net.Analysis.TokenAttributes.ITypeAttribute"/> value to the
/// <see cref="Lucene.Net.Analysis.TokenAttributes.ICharTermAttribute"/>, optionally with a customized prefix
/// (so that tags effectively occupy a different namespace from token text).
/// </summary>
public sealed class OpenNLPPOSFilter : TokenFilter
{
Expand Down
Loading

0 comments on commit 77cf172

Please sign in to comment.