-
Notifications
You must be signed in to change notification settings - Fork 100
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a356b9f
commit ff86c62
Showing
44 changed files
with
2,417 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/* | ||
* Open Chinese Convert | ||
* | ||
* Copyright 2010-2014 Carbo Kuo <[email protected]> | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include "Common.hpp" | ||
#include "SerializableDict.hpp" | ||
|
||
namespace opencc { | ||
/** | ||
* Binary dictionary for faster deserialization | ||
* @ingroup opencc_cpp_api | ||
*/ | ||
class OPENCC_EXPORT BinaryDict : public SerializableDict { | ||
public: | ||
BinaryDict(const LexiconPtr& _lexicon) : lexicon(_lexicon) {} | ||
|
||
virtual ~BinaryDict() {} | ||
|
||
virtual void SerializeToFile(FILE* fp) const; | ||
|
||
static BinaryDictPtr NewFromFile(FILE* fp); | ||
|
||
const LexiconPtr& GetLexicon() const { return lexicon; } | ||
|
||
size_t KeyMaxLength() const; | ||
|
||
private: | ||
LexiconPtr lexicon; | ||
std::string keyBuffer; | ||
std::string valueBuffer; | ||
|
||
void ConstructBuffer(std::string& keyBuffer, std::vector<size_t>& keyOffset, | ||
size_t& keyTotalLength, std::string& valueBuffer, | ||
std::vector<size_t>& valueOffset, | ||
size_t& valueTotalLength) const; | ||
}; | ||
} // namespace opencc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Open Chinese Convert | ||
* | ||
* Copyright 2010-2014 Carbo Kuo <[email protected]> | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
// Microsoft Visual C++ specific | ||
#if defined(_MSC_VER) && (_MSC_VER >= 1020) | ||
#pragma warning(disable : 4251 4266 4350 4503 4512 4514 4710 4820) | ||
#endif | ||
|
||
#include <cstddef> | ||
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "Export.hpp" | ||
#include "Optional.hpp" | ||
|
||
// Forward decalarations and alias | ||
namespace opencc { | ||
class Config; | ||
class Conversion; | ||
class ConversionChain; | ||
class Converter; | ||
class Dict; | ||
class DictEntry; | ||
class DictGroup; | ||
class Lexicon; | ||
class MarisaDict; | ||
class MultiValueDictEntry; | ||
class NoValueDictEntry; | ||
class Segmentation; | ||
class Segments; | ||
class SerializableDict; | ||
class SingleValueDictEntry; | ||
class TextDict; | ||
typedef std::shared_ptr<Conversion> ConversionPtr; | ||
typedef std::shared_ptr<ConversionChain> ConversionChainPtr; | ||
typedef std::shared_ptr<Converter> ConverterPtr; | ||
typedef std::shared_ptr<Dict> DictPtr; | ||
typedef std::shared_ptr<DictGroup> DictGroupPtr; | ||
typedef std::shared_ptr<Lexicon> LexiconPtr; | ||
typedef std::shared_ptr<MarisaDict> MarisaDictPtr; | ||
typedef std::shared_ptr<Segmentation> SegmentationPtr; | ||
typedef std::shared_ptr<Segments> SegmentsPtr; | ||
typedef std::shared_ptr<SerializableDict> SerializableDictPtr; | ||
typedef std::shared_ptr<TextDict> TextDictPtr; | ||
|
||
#ifdef ENABLE_DARTS | ||
class BinaryDict; | ||
class DartsDict; | ||
typedef std::shared_ptr<BinaryDict> BinaryDictPtr; | ||
typedef std::shared_ptr<DartsDict> DartsDictPtr; | ||
#endif | ||
|
||
} // namespace opencc | ||
|
||
#ifndef PKGDATADIR | ||
const std::string PACKAGE_DATA_DIRECTORY = ""; | ||
#else // ifndef PKGDATADIR | ||
const std::string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/"; | ||
#endif // ifndef PKGDATADIR | ||
|
||
#ifndef VERSION | ||
#define VERSION "1.0.*" | ||
#endif // ifndef VERSION | ||
|
||
// The following definitions are provided by CMake | ||
// #define ENABLE_DARTS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* Open Chinese Convert | ||
* | ||
* Copyright 2010-2014 Carbo Kuo <[email protected]> | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include "Common.hpp" | ||
|
||
namespace opencc { | ||
/** | ||
* Configuration loader | ||
* @ingroup opencc_cpp_api | ||
*/ | ||
class OPENCC_EXPORT Config { | ||
public: | ||
Config(); | ||
|
||
virtual ~Config(); | ||
|
||
ConverterPtr NewFromString(const std::string& json, | ||
const std::string& configDirectory); | ||
|
||
ConverterPtr NewFromFile(const std::string& fileName); | ||
|
||
private: | ||
void* internal; | ||
}; | ||
} // namespace opencc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
* Open Chinese Convert | ||
* | ||
* Copyright 2010-2014 Carbo Kuo <[email protected]> | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include "Common.hpp" | ||
#include "Segmentation.hpp" | ||
|
||
namespace opencc { | ||
/** | ||
* Conversion interface | ||
* @ingroup opencc_cpp_api | ||
*/ | ||
class OPENCC_EXPORT Conversion { | ||
public: | ||
Conversion(DictPtr _dict) : dict(_dict) {} | ||
|
||
// Convert single phrase | ||
std::string Convert(const std::string& phrase) const; | ||
|
||
// Convert single phrase | ||
std::string Convert(const char* phrase) const; | ||
|
||
// Convert segmented text | ||
SegmentsPtr Convert(const SegmentsPtr& input) const; | ||
|
||
const DictPtr GetDict() const { return dict; } | ||
|
||
private: | ||
const DictPtr dict; | ||
}; | ||
} // namespace opencc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* Open Chinese Convert | ||
* | ||
* Copyright 2010-2014 Carbo Kuo <[email protected]> | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <list> | ||
|
||
#include "Common.hpp" | ||
#include "Conversion.hpp" | ||
|
||
namespace opencc { | ||
/** | ||
* Chain of conversions | ||
* Consists of a list of conversions. Converts input in sequence. | ||
* @ingroup opencc_cpp_api | ||
*/ | ||
class OPENCC_EXPORT ConversionChain { | ||
public: | ||
ConversionChain(const std::list<ConversionPtr> _conversions); | ||
|
||
SegmentsPtr Convert(const SegmentsPtr& input) const; | ||
|
||
const std::list<ConversionPtr> GetConversions() const { return conversions; } | ||
|
||
private: | ||
const std::list<ConversionPtr> conversions; | ||
}; | ||
} // namespace opencc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/* | ||
* Open Chinese Convert | ||
* | ||
* Copyright 2010-2014 Carbo Kuo <[email protected]> | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include "Common.hpp" | ||
#include "Segmentation.hpp" | ||
|
||
namespace opencc { | ||
/** | ||
* Controller of segmentation and conversion | ||
* @ingroup opencc_cpp_api | ||
*/ | ||
class OPENCC_EXPORT Converter { | ||
public: | ||
Converter(const std::string& _name, SegmentationPtr _segmentation, | ||
ConversionChainPtr _conversionChain) | ||
: name(_name), segmentation(_segmentation), | ||
conversionChain(_conversionChain) {} | ||
|
||
std::string Convert(const std::string& text) const; | ||
|
||
size_t Convert(const char* input, char* output) const; | ||
|
||
const SegmentationPtr GetSegmentation() const { return segmentation; } | ||
|
||
const ConversionChainPtr GetConversionChain() const { | ||
return conversionChain; | ||
} | ||
|
||
private: | ||
const std::string name; | ||
const SegmentationPtr segmentation; | ||
const ConversionChainPtr conversionChain; | ||
}; | ||
} // namespace opencc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* Open Chinese Convert | ||
* | ||
* Copyright 2010-2014 Carbo Kuo <[email protected]> | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include "Common.hpp" | ||
#include "SerializableDict.hpp" | ||
|
||
namespace opencc { | ||
/** | ||
* Darts dictionary | ||
* @ingroup opencc_cpp_api | ||
*/ | ||
class OPENCC_EXPORT DartsDict : public Dict, public SerializableDict { | ||
public: | ||
virtual ~DartsDict(); | ||
|
||
virtual size_t KeyMaxLength() const; | ||
|
||
virtual Optional<const DictEntry*> Match(const char* word, size_t len) const; | ||
|
||
virtual Optional<const DictEntry*> MatchPrefix(const char* word, | ||
size_t len) const; | ||
|
||
virtual LexiconPtr GetLexicon() const; | ||
|
||
virtual void SerializeToFile(FILE* fp) const; | ||
|
||
/** | ||
* Constructs a DartsDict from another dictionary. | ||
*/ | ||
static DartsDictPtr NewFromDict(const Dict& thatDict); | ||
|
||
static DartsDictPtr NewFromFile(FILE* fp); | ||
|
||
private: | ||
DartsDict(); | ||
|
||
size_t maxLength; | ||
LexiconPtr lexicon; | ||
|
||
class DartsInternal; | ||
DartsInternal* internal; | ||
}; | ||
} // namespace opencc |
Oops, something went wrong.