diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock
index c565b581b9..1cbf87b84f 100644
--- a/docs/Gemfile.lock
+++ b/docs/Gemfile.lock
@@ -7,23 +7,22 @@ PATH
GEM
remote: https://rubygems.org/
specs:
- activesupport (6.0.6)
+ activesupport (7.0.7.2)
concurrent-ruby (~> 1.0, >= 1.0.2)
- i18n (>= 0.7, < 2)
- minitest (~> 5.1)
- tzinfo (~> 1.1)
- zeitwerk (~> 2.2, >= 2.2.2)
- addressable (2.8.1)
+ i18n (>= 1.6, < 2)
+ minitest (>= 5.1)
+ tzinfo (~> 2.0)
+ addressable (2.8.5)
public_suffix (>= 2.0.2, < 6.0)
coffee-script (2.4.1)
coffee-script-source
execjs
coffee-script-source (1.11.1)
colorator (1.1.0)
- commonmarker (0.23.6)
- concurrent-ruby (1.1.10)
- dnsruby (1.61.9)
- simpleidn (~> 0.1)
+ commonmarker (0.23.10)
+ concurrent-ruby (1.2.2)
+ dnsruby (1.70.0)
+ simpleidn (~> 0.2.1)
elasticsearch (7.17.7)
elasticsearch-api (= 7.17.7)
elasticsearch-transport (= 7.17.7)
@@ -35,12 +34,12 @@ GEM
em-websocket (0.5.3)
eventmachine (>= 0.12.9)
http_parser.rb (~> 0)
- ethon (0.15.0)
+ ethon (0.16.0)
ffi (>= 1.15.0)
eventmachine (1.2.7)
eventmachine (1.2.7-x64-mingw32)
execjs (2.8.1)
- faraday (1.10.2)
+ faraday (1.10.3)
faraday-em_http (~> 1.0)
faraday-em_synchrony (~> 1.0)
faraday-excon (~> 1.1)
@@ -64,16 +63,14 @@ GEM
faraday-rack (1.0.0)
faraday-retry (1.0.3)
ffi (1.15.5)
- ffi (1.15.5-x64-mingw-ucrt)
- ffi (1.15.5-x64-mingw32)
forwardable-extended (2.6.0)
gemoji (3.0.1)
- github-pages (227)
+ github-pages (228)
github-pages-health-check (= 1.17.9)
- jekyll (= 3.9.2)
+ jekyll (= 3.9.3)
jekyll-avatar (= 0.7.0)
jekyll-coffeescript (= 1.1.1)
- jekyll-commonmark-ghpages (= 0.2.0)
+ jekyll-commonmark-ghpages (= 0.4.0)
jekyll-default-layout (= 0.1.4)
jekyll-feed (= 0.15.1)
jekyll-gist (= 1.5.0)
@@ -107,7 +104,7 @@ GEM
jemoji (= 0.12.0)
kramdown (= 2.3.2)
kramdown-parser-gfm (= 1.1.0)
- liquid (= 4.0.3)
+ liquid (= 4.0.4)
mercenary (~> 0.3)
minima (= 2.5.1)
nokogiri (>= 1.13.6, < 2.0)
@@ -123,13 +120,13 @@ GEM
activesupport (>= 2)
nokogiri (>= 1.4)
http_parser.rb (0.8.0)
- i18n (0.9.5)
+ i18n (1.14.1)
concurrent-ruby (~> 1.0)
- jekyll (3.9.2)
+ jekyll (3.9.3)
addressable (~> 2.4)
colorator (~> 1.0)
em-websocket (~> 0.5)
- i18n (~> 0.7)
+ i18n (>= 0.7, < 2)
jekyll-sass-converter (~> 1.0)
jekyll-watch (~> 2.0)
kramdown (>= 1.17, < 3)
@@ -145,11 +142,11 @@ GEM
coffee-script-source (~> 1.11.1)
jekyll-commonmark (1.4.0)
commonmarker (~> 0.22)
- jekyll-commonmark-ghpages (0.2.0)
- commonmarker (~> 0.23.4)
+ jekyll-commonmark-ghpages (0.4.0)
+ commonmarker (~> 0.23.7)
jekyll (~> 3.9.0)
jekyll-commonmark (~> 1.4.0)
- rouge (>= 2.0, < 4.0)
+ rouge (>= 2.0, < 5.0)
jekyll-default-layout (0.1.4)
jekyll (~> 3.0)
jekyll-feed (0.15.1)
@@ -237,21 +234,21 @@ GEM
rexml
kramdown-parser-gfm (1.1.0)
kramdown (~> 2.0)
- liquid (4.0.3)
- listen (3.7.1)
+ liquid (4.0.4)
+ listen (3.8.0)
rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10)
mercenary (0.3.6)
- mini_portile2 (2.8.0)
+ mini_portile2 (2.8.4)
minima (2.5.1)
jekyll (>= 3.5, < 5.0)
jekyll-feed (~> 0.9)
jekyll-seo-tag (~> 2.1)
- minitest (5.16.3)
+ minitest (5.19.0)
multi_json (1.15.0)
- multipart-post (2.2.3)
- nokogiri (1.13.9)
- mini_portile2 (~> 2.8.0)
+ multipart-post (2.3.0)
+ nokogiri (1.15.4)
+ mini_portile2 (~> 2.8.2)
racc (~> 1.4)
octokit (4.25.1)
faraday (>= 1, < 3)
@@ -259,11 +256,11 @@ GEM
pathutil (0.16.2)
forwardable-extended (~> 2.6)
public_suffix (4.0.7)
- racc (1.6.0)
+ racc (1.7.1)
rb-fsevent (0.11.2)
rb-inotify (0.10.1)
ffi (~> 1.0)
- rexml (3.2.5)
+ rexml (3.2.6)
rouge (3.26.0)
ruby2_keywords (0.0.5)
rubyzip (2.3.2)
@@ -280,20 +277,19 @@ GEM
unf (~> 0.1.4)
terminal-table (1.8.0)
unicode-display_width (~> 1.1, >= 1.1.1)
- thread_safe (0.3.6)
typhoeus (1.4.0)
ethon (>= 0.9.0)
- tzinfo (1.2.10)
- thread_safe (~> 0.1)
+ tzinfo (2.0.6)
+ concurrent-ruby (~> 1.0)
unf (0.1.4)
unf_ext
unf_ext (0.0.8.2)
unicode-display_width (1.8.0)
wdm (0.1.1)
- webrick (1.7.0)
- zeitwerk (2.6.1)
+ webrick (1.8.1)
PLATFORMS
+ arm64-darwin-22
x64-mingw-ucrt
x64-mingw32
x86_64-darwin-21
@@ -302,7 +298,7 @@ PLATFORMS
DEPENDENCIES
elasticsearch (~> 7.10)
- github-pages (= 227)
+ github-pages (= 228)
jekyll (~> 3.9)
jekyll-incremental (= 0.1.0)!
jekyll-redirect-from
diff --git a/docs/en/ocr_pipeline_components.md b/docs/en/ocr_pipeline_components.md
index 913c754df5..6ee254374c 100644
--- a/docs/en/ocr_pipeline_components.md
+++ b/docs/en/ocr_pipeline_components.md
@@ -1356,6 +1356,89 @@ data.select("dicom").show()
+
+##### Input Columns
+
+{:.table-model-big}
+| Param name | Type | Default | Column Data Description |
+| --- | --- | --- | --- |
+| inputCol | string | content | Binary dicom object |
+| inputRegionsCol | string | regions | Detected Array[Coordinates] from PositionFinder |
+
+
+
+#### Parameters
+
+{:.table-model-big}
+| Param name | Type | Default | Description |
+| --- | --- | --- | --- |
+| scaleFactor | float | 1.0 | Scaling factor for regions. |
+| rotated | boolean | False | Enable/Disable support for rotated rectangles |
+| keepInput | boolean | False | Keep the original input column |
+| compression | string | RLELossless | Compression type |
+| forceCompress | boolean | False | True - Force compress image. False - compress only if original image was compressed |
+| aggCols | Array[string] | ['path'] | Sets the columns to be included in aggregation. These columns are preserved in the output DataFrame after transformations |
+
+
+
## Image pre-processing
Next section describes the transformers for image pre-processing: scaling, binarization, skew correction, etc.
@@ -2896,6 +2979,11 @@ val result = modelPipeline.transform(df)
| lineWidth | Int | 4 | Line width for draw rectangles |
| fontSize | Int | 12 | Font size for render labels and score |
| rotated | boolean | False | Support rotated regions |
+| rectColor | Color | Color.black | Color outline for bounding box |
+| filledRect | boolean | False | Enable/Disable filling rectangle |
+| sourceImageHeightCol | Int | height_dimension | Original annotation reference height |
+| sourceImageWidthCol | Int | width_dimension | Original annotation reference width |
+| scaleBoundingBoxes | Boolean | True | sourceImage height & width are required for scaling. Necessary to ensure accurate regions despite image transformations.|
@@ -2915,13 +3003,12 @@ val result = modelPipeline.transform(df)
```python
from pyspark.ml import PipelineModel
from sparkocr.transformers import *
+from sparkocr.enums import *
imagePath = "path to image"
# Read image file as binary file
-df = spark.read
- .format("binaryFile")
- .load(imagePath)
+df = spark.read.format("binaryFile").load(imagePath)
binary_to_image = BinaryToImage() \
.setInputCol("content") \
@@ -2935,6 +3022,7 @@ layout_analyzer = ImageLayoutAnalyzer() \
draw = ImageDrawRegions() \
.setInputCol("image") \
.setRegionCol("regions") \
+ .setRectColor(Color.red) \
.setOutputCol("image_with_regions")
# Define pipeline
@@ -2950,17 +3038,16 @@ data.show()
```scala
import org.apache.spark.ml.Pipeline
+import java.awt.Color
import com.johnsnowlabs.ocr.transformers.{ImageSplitRegions, ImageLayoutAnalyzer}
import com.johnsnowlabs.ocr.OcrContext.implicits._
+
val imagePath = "path to image"
// Read image file as binary file
-val df = spark.read
- .format("binaryFile")
- .load(imagePath)
- .asImage("image")
+val df = spark.read.format("binaryFile").load(imagePath).asImage("image")
// Define transformer for detect regions
val layoutAnalyzer = new ImageLayoutAnalyzer()
@@ -2970,6 +3057,7 @@ val layoutAnalyzer = new ImageLayoutAnalyzer()
val draw = new ImageDrawRegions()
.setInputCol("image")
.setRegionCol("regions")
+ .setRectColor(Color.RED)
.setOutputCol("image_with_regions")
// Define pipeline
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.AssertionTFGraphBuilder.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.AssertionTFGraphBuilder.html
deleted file mode 100644
index 0b352b8a4a..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.AssertionTFGraphBuilder.html
+++ /dev/null
@@ -1,650 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.AssertionTFGraphBuilder — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.AssertionTFGraphBuilder
-
-
-class sparknlp_jsl._tf_graph_builders.graph_builders. AssertionTFGraphBuilder ( build_params ) [source]
-Bases: sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilder
-Class to build the the TF graphs for AssertionDLApproach
-Examples
->>> from sparknlp_jsl.training import tf_graph
->>> from sparknlp_jsl.base import *
->>> from sparknlp.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> from sparknlp_jsl.annotator import *
->>>feat_size = 200
->>>n_classes = 6
->>> tf_graph . build ( "assertion_dl" , build_params = { "n_classes" : n_classes }, model_location = "./tf_graphs" , model_filename = "blstm_34_32_30_ {} _ {} .pb" . format ( feat_size , n_classes ))
->>> assertion = AssertionDLApproach () \
->>> . setLabelCol ( "label" ) \
->>> . setInputCols ( "document" , "chunk" , "embeddings" ) \
->>> . setOutputCol ( "assertion" ) \
->>> . setBatchSize ( 128 ) \
->>> . setDropout ( 0.1 ) \
->>> . setLearningRate ( 0.001 ) \
->>> . setEpochs ( 50 ) \
->>> . setValidationSplit ( 0.2 ) \
->>> . setStartCol ( "start" ) \
->>> . setEndCol ( "end" ) \
->>> . setMaxSentLen ( 250 ) \
->>> . setEnableOutputLogs ( True ) \
->>> . setOutputLogsPath ( 'training_logs/' ) \
->>> . setGraphFolder ( 'tf_graphs' )
-
-
-Methods
-
-
-
-
-
-
-__init__
(build_params)
-
-
-build
(model_location, model_filename)
-
-
-check_build_params
()
-
-
-get_build_param
(build_param)
-
-
-get_build_params
()
-
-
-get_build_params_with_defaults
()
-
-
-get_model_build_param_explanations
()
-
-
-get_model_build_params
()
-
-
-get_model_filename
()
-
-
-supports_auto_file_name
()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.GenericClassifierTFGraphBuilder.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.GenericClassifierTFGraphBuilder.html
deleted file mode 100644
index b3d99d3487..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.GenericClassifierTFGraphBuilder.html
+++ /dev/null
@@ -1,650 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.GenericClassifierTFGraphBuilder — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.GenericClassifierTFGraphBuilder
-
-
-class sparknlp_jsl._tf_graph_builders.graph_builders. GenericClassifierTFGraphBuilder ( build_params ) [source]
-Bases: sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilder
-Class to create the the TF graphs for GenericClassifierApproach
-Examples
->>> from sparknlp_jsl.training import tf_graph
->>> from sparknlp_jsl.base import *
->>> from sparknlp.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> dataframe = pd . read_csv ( 'petfinder-mini.csv' )
->>> DL_params = { "input_dim" : 302 , "output_dim" : 2 , "hidden_layers" : [ 300 , 200 , 100 ], "hidden_act" : "tanh" , 'hidden_act_l2' : 1 , 'batch_norm' : 1 }
->>> tf_graph . build ( "generic_classifier" , build_params = DL_params , model_location = "/content/gc_graph" , model_filename = "auto" )
->>> gen_clf = GenericClassifierApproach () \
-... . setLabelColumn ( "target" ) \
-... . setInputCols ([ "features" ]) \
-... . setOutputCol ( "prediction" ) \
-... . setModelFile ( '/content/gc_graph/gcl.302.2.pb' ) \
-... . setEpochsNumber ( 50 ) \
-... . setBatchSize ( 100 ) \
-... . setFeatureScaling ( "zscore" ) \
-... . setFixImbalance ( True ) \
-... . setLearningRate ( 0.001 ) \
-... . setOutputLogsPath ( "logs" ) \
-... . setValidationSplit ( 0.2 )
-
-
->>> clf_Pipeline = Pipeline ( stages = [ features_asm , gen_clf ])
-
-
-Methods
-
-
-
-
-
-
-__init__
(build_params)
-
-
-build
(model_location, model_filename)
-
-
-check_build_params
()
-
-
-get_build_param
(build_param)
-
-
-get_build_params
()
-
-
-get_build_params_with_defaults
()
-
-
-get_model_build_param_explanations
()
-
-
-get_model_build_params
()
-
-
-get_model_filename
()
-
-
-supports_auto_file_name
()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.NerTFGraphBuilder.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.NerTFGraphBuilder.html
deleted file mode 100644
index 047ae3fc1a..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.NerTFGraphBuilder.html
+++ /dev/null
@@ -1,636 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.NerTFGraphBuilder — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.NerTFGraphBuilder
-
-
-class sparknlp_jsl._tf_graph_builders.graph_builders. NerTFGraphBuilder ( build_params ) [source]
-Bases: sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilder
-Class to build the the TF graphs for MedicalNerApproach.
-Examples
->>> from sparknlp_jsl.training import tf_graph
->>> from sparknlp_jsl.base import *
->>> from sparknlp.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> from sparknlp_jsl.annotator import *
->>>feat_size = 200
->>>n_classes = 6
->>> tf_graph . build ( "ner_dl" , build_params = { "embeddings_dim" : 200 , "nchars" : 83 , "ntags" : 12 , "is_medical" : 1 }, model_location = "./medical_ner_graphs" , model_filename = "auto" )
->>> nerTagger = MedicalNerApproach () >>> . setInputCols ([ "sentence" , "token" , "embeddings" ]) >>> . setLabelColumn ( "label" ) >>> . setOutputCol ( "ner" ) >>> . setMaxEpochs ( 2 ) >>> . setBatchSize ( 64 ) >>> . setRandomSeed ( 0 ) >>> . setVerbose ( 1 ) >>> . setValidationSplit ( 0.2 ) >>> . setEvaluationLogExtended ( True ) >>> . setEnableOutputLogs ( True ) >>> . setIncludeConfidence ( True ) >>> . setOutputLogsPath ( 'ner_logs' ) >>> . setGraphFolder ( 'medical_ner_graphs' ) >>> . setEnableMemoryOptimizer ( True )
-
-
-Methods
-
-
-
-
-
-
-__init__
(build_params)
-
-
-build
(model_location, model_filename)
-
-
-check_build_params
()
-
-
-get_build_param
(build_param)
-
-
-get_build_params
()
-
-
-get_build_params_with_defaults
()
-
-
-get_model_build_param_explanations
()
-
-
-get_model_build_params
()
-
-
-get_model_filename
()
-
-
-supports_auto_file_name
()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.RelationExtractionTFGraphBuilder.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.RelationExtractionTFGraphBuilder.html
deleted file mode 100644
index 3525e029d6..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.RelationExtractionTFGraphBuilder.html
+++ /dev/null
@@ -1,645 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.RelationExtractionTFGraphBuilder — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilder.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilder.html
deleted file mode 100644
index 5b81109db4..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilder.html
+++ /dev/null
@@ -1,626 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilder — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilder
-
-
-class sparknlp_jsl._tf_graph_builders.graph_builders. TFGraphBuilder ( build_params ) [source]
-Bases: object
-Generic class to create the tensorflow graphs for ‘ner_dl’, ‘generic_classifier’, ‘assertion_dl’, ‘relation_extraction’ annotators in spark-nlp healthcare. In version 1.1
-Examples
-——–
->>> from sparknlp_jsl.training import tf_graph
->>>
->>> tf_graph.get_models()
-Methods
-
-
-
-
-
-
-__init__
(build_params)
-
-
-check_build_params
()
-
-
-get_build_param
(build_param)
-
-
-get_build_params
()
-
-
-get_build_params_with_defaults
()
-
-
-get_model_build_param_explanations
()
-
-
-get_model_build_params
()
-
-
-get_model_filename
()
-
-
-supports_auto_file_name
()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilderFactory.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilderFactory.html
deleted file mode 100644
index 9d54984723..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilderFactory.html
+++ /dev/null
@@ -1,660 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilderFactory — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.TFGraphBuilderFactory
-
-
-class sparknlp_jsl._tf_graph_builders.graph_builders. TFGraphBuilderFactory [source]
-Bases: object
-Factory class to create the the different tensorflow graphs for ner_dl, generic_classifier, assertion_dl, relation_extraction annotators in spark-nlp healthcare
-Methods
-
-
-
-
-
-
-__init__
(*args, **kwargs)
-
-
-build
(model_name, build_params, model_location)
-Method that create the tf graph.
-
-get_models
()
-Method that return the available tf models in spark-nlp healthcare Examples -------- >>> from sparknlp_jsl.training import tf_graph >>> tf_graph.get_models()
-
-print_model_params
(model_name)
-Method that return the params allowed for the tf model.This method return the params with the description for every param.
-
-
-
-
-
-static build ( model_name , build_params , model_location , model_filename = 'auto' ) [source]
-Method that create the tf graph.
-
-Parameters
-
-model_name: str The name of the tf model that you want to build.Model availables ner_dl,generic_classifier,assertion_dl and relation_extraction
-
-build_params: dict Configuration params to build the tf graph for the specific model.
-
-model_location: str Path where the model will be saved
-
-model_filename: str Name of the .rb file. If you put auto the filename will be generated.
-
-
-
-
-Examples
->>> from sparknlp_jsl.training import tf_graph
->>> tf_graph . build ( "assertion_dl" , build_params = { "n_classes" : 10 }, model_location = "/tmp" , model_filename = "assertion_dl.pb" )
-
-
-
-
-
-
-static get_models ( ) [source]
-Method that return the available tf models in spark-nlp healthcare
-Examples
-——–
->>> from sparknlp_jsl.training import tf_graph
->>> tf_graph.get_models()
-
-
-
-
-static print_model_params ( model_name ) [source]
-Method that return the params allowed for the tf model.This method return the params with the description for every param.
-
-Parameters
-
-model_name: str The name of the tf model name.Model availables ner_dl,generic_classifier,assertion_dl and relation_extraction
-
-
-
-
-Examples
->>> from sparknlp_jsl.training import tf_graph
->>> tf_graph . print_model_params ( "assertion_dl" )
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.TensorflowAddonsNeeded.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.TensorflowAddonsNeeded.html
deleted file mode 100644
index 19214050f0..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.TensorflowAddonsNeeded.html
+++ /dev/null
@@ -1,583 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.TensorflowAddonsNeeded — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.TensorflowAddonsNeeded
-
-
-exception sparknlp_jsl._tf_graph_builders.graph_builders. TensorflowAddonsNeeded [source]
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.WrongTFVersion.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.WrongTFVersion.html
deleted file mode 100644
index 187dcd2d69..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.WrongTFVersion.html
+++ /dev/null
@@ -1,581 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.WrongTFVersion — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders.WrongTFVersion
-
-
-exception sparknlp_jsl._tf_graph_builders.graph_builders. WrongTFVersion [source]
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.html
deleted file mode 100644
index 2af68ed060..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders.graph_builders.html
+++ /dev/null
@@ -1,620 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders.graph_builders
-
Classes
-
-
-
-
-
-
-AssertionTFGraphBuilder
-Class to build the the TF graphs for AssertionDLApproach
-
-GenericClassifierTFGraphBuilder
-Class to create the the TF graphs for GenericClassifierApproach
-
-NerTFGraphBuilder
-Class to build the the TF graphs for MedicalNerApproach.
-
-RelationExtractionTFGraphBuilder
-Class to build the the TF graphs for RelationExtractionApproach
-
-TFGraphBuilder
-Generic class to create the tensorflow graphs for 'ner_dl', 'generic_classifier', 'assertion_dl', 'relation_extraction' annotators in spark-nlp healthcare.
-
-TFGraphBuilderFactory
-Factory class to create the the different tensorflow graphs for ner_dl, generic_classifier, assertion_dl, relation_extraction annotators in spark-nlp healthcare
-
-
-
-
Exceptions
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.AssertionTFGraphBuilder.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.AssertionTFGraphBuilder.html
deleted file mode 100644
index a645f6dc77..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.AssertionTFGraphBuilder.html
+++ /dev/null
@@ -1,259 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.AssertionTFGraphBuilder — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.AssertionTFGraphBuilder
-
-
-class sparknlp_jsl._tf_graph_builders_1x.graph_builders. AssertionTFGraphBuilder ( build_params ) [source]
-Bases: sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilder
-Class to build the the TF graphs for AssertionDLApproach
-Examples
->>> from sparknlp_jsl.training import tf_graph_1x
->>> from sparknlp_jsl.base import *
->>> from sparknlp.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> from sparknlp_jsl.annotator import *
->>>feat_size = 200
->>>n_classes = 6
->>> tf_graph_1x . build ( "assertion_dl" , build_params = { "n_classes" : n_classes }, model_location = "./tf_graphs" , model_filename = "blstm_34_32_30_ {} _ {} .pb" . format ( feat_size , n_classes ))
->>> assertion = AssertionDLApproach () \
->>> . setLabelCol ( "label" ) \
->>> . setInputCols ( "document" , "chunk" , "embeddings" ) \
->>> . setOutputCol ( "assertion" ) \
->>> . setBatchSize ( 128 ) \
->>> . setDropout ( 0.1 ) \
->>> . setLearningRate ( 0.001 ) \
->>> . setEpochs ( 50 ) \
->>> . setValidationSplit ( 0.2 ) \
->>> . setStartCol ( "start" ) \
->>> . setEndCol ( "end" ) \
->>> . setMaxSentLen ( 250 ) \
->>> . setEnableOutputLogs ( True ) \
->>> . setOutputLogsPath ( 'training_logs/' ) \
->>> . setGraphFolder ( 'tf_graphs' )
-
-
-Methods
-
-
-
-
-
-
-__init__
(build_params)
-
-
-build
(model_location, model_filename)
-
-
-check_build_params
()
-
-
-get_build_param
(build_param)
-
-
-get_build_params
()
-
-
-get_build_params_with_defaults
()
-
-
-get_model_build_param_explanations
()
-
-
-get_model_build_params
()
-
-
-get_model_filename
()
-
-
-supports_auto_file_name
()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.GenericClassifierTFGraphBuilder.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.GenericClassifierTFGraphBuilder.html
deleted file mode 100644
index d0cf258ff6..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.GenericClassifierTFGraphBuilder.html
+++ /dev/null
@@ -1,257 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.GenericClassifierTFGraphBuilder — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.GenericClassifierTFGraphBuilder
-
-
-class sparknlp_jsl._tf_graph_builders_1x.graph_builders. GenericClassifierTFGraphBuilder ( build_params ) [source]
-Bases: sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilder
-Class to create the the TF graphs for GenericClassifierApproach
-Examples
->>> from sparknlp_jsl.training import tf_graph
->>> from sparknlp_jsl.base import *
->>> from sparknlp.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> dataframe = pd . read_csv ( 'petfinder-mini.csv' )
->>> DL_params = { "input_dim" : 302 , "output_dim" : 2 , "hidden_layers" : [ 300 , 200 , 100 ], "hidden_act" : "tanh" , 'hidden_act_l2' : 1 , 'batch_norm' : 1 }
->>> tf_graph . build ( "generic_classifier" , build_params = DL_params , model_location = "/content/gc_graph" , model_filename = "auto" )
->>> gen_clf = GenericClassifierApproach () \
-... . setLabelColumn ( "target" ) \
-... . setInputCols ([ "features" ]) \
-... . setOutputCol ( "prediction" ) \
-... . setModelFile ( '/content/gc_graph/gcl.302.2.pb' ) \
-... . setEpochsNumber ( 50 ) \
-... . setBatchSize ( 100 ) \
-... . setFeatureScaling ( "zscore" ) \
-... . setFixImbalance ( True ) \
-... . setLearningRate ( 0.001 ) \
-... . setOutputLogsPath ( "logs" ) \
-... . setValidationSplit ( 0.2 )
->>> clf_Pipeline = Pipeline ( stages = [ features_asm , gen_clf ])
-
-
-Methods
-
-
-
-
-
-
-__init__
(build_params)
-
-
-build
(model_location, model_filename)
-
-
-check_build_params
()
-
-
-get_build_param
(build_param)
-
-
-get_build_params
()
-
-
-get_build_params_with_defaults
()
-
-
-get_model_build_param_explanations
()
-
-
-get_model_build_params
()
-
-
-get_model_filename
()
-
-
-supports_auto_file_name
()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.NerTFGraphBuilder.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.NerTFGraphBuilder.html
deleted file mode 100644
index 0bab574412..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.NerTFGraphBuilder.html
+++ /dev/null
@@ -1,245 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.NerTFGraphBuilder — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.NerTFGraphBuilder
-
-
-class sparknlp_jsl._tf_graph_builders_1x.graph_builders. NerTFGraphBuilder ( build_params ) [source]
-Bases: sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilder
-Class to build the the TF graphs for MedicalNerApproach.
-Examples
->>> from sparknlp_jsl.training import tf_graph_1x
->>> from sparknlp_jsl.base import *
->>> from sparknlp.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> from sparknlp_jsl.annotator import *
->>>feat_size = 200
->>>n_classes = 6
->>> tf_graph_1x . build ( "ner_dl" , build_params = { "embeddings_dim" : 200 , "nchars" : 83 , "ntags" : 12 , "is_medical" : 1 }, model_location = "./medical_ner_graphs" , model_filename = "auto" )
->>> nerTagger = MedicalNerApproach () >>> . setInputCols ([ "sentence" , "token" , "embeddings" ]) >>> . setLabelColumn ( "label" ) >>> . setOutputCol ( "ner" ) >>> . setMaxEpochs ( 2 ) >>> . setBatchSize ( 64 ) >>> . setRandomSeed ( 0 ) >>> . setVerbose ( 1 ) >>> . setValidationSplit ( 0.2 ) >>> . setEvaluationLogExtended ( True ) >>> . setEnableOutputLogs ( True ) >>> . setIncludeConfidence ( True ) >>> . setOutputLogsPath ( 'ner_logs' ) >>> . setGraphFolder ( 'medical_ner_graphs' ) >>> . setEnableMemoryOptimizer ( True )
-
-
-Methods
-
-
-
-
-
-
-__init__
(build_params)
-
-
-build
(model_location, model_filename)
-
-
-check_build_params
()
-
-
-get_build_param
(build_param)
-
-
-get_build_params
()
-
-
-get_build_params_with_defaults
()
-
-
-get_model_build_param_explanations
()
-
-
-get_model_build_params
()
-
-
-get_model_filename
()
-
-
-supports_auto_file_name
()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.RelationExtractionTFGraphBuilder.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.RelationExtractionTFGraphBuilder.html
deleted file mode 100644
index 4d5cc11ec9..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.RelationExtractionTFGraphBuilder.html
+++ /dev/null
@@ -1,254 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.RelationExtractionTFGraphBuilder — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilder.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilder.html
deleted file mode 100644
index 1771232948..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilder.html
+++ /dev/null
@@ -1,235 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilder — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilder
-
-
-class sparknlp_jsl._tf_graph_builders_1x.graph_builders. TFGraphBuilder ( build_params ) [source]
-Bases: object
-Generic class to create the tensorflow graphs for ‘ner_dl’, ‘generic_classifier’, ‘assertion_dl’, ‘relation_extraction’ annotators in spark-nlp healthcare
-Examples
->>> from sparknlp_jsl.training import tf_graph_1x
->>> tf_graph_1x . get_models ()
-
-
-Methods
-
-
-
-
-
-
-__init__
(build_params)
-
-
-check_build_params
()
-
-
-get_build_param
(build_param)
-
-
-get_build_params
()
-
-
-get_build_params_with_defaults
()
-
-
-get_model_build_param_explanations
()
-
-
-get_model_build_params
()
-
-
-get_model_filename
()
-
-
-supports_auto_file_name
()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilderFactory.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilderFactory.html
deleted file mode 100644
index 5c6d0b1407..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilderFactory.html
+++ /dev/null
@@ -1,270 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilderFactory — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.TFGraphBuilderFactory
-
-
-class sparknlp_jsl._tf_graph_builders_1x.graph_builders. TFGraphBuilderFactory [source]
-Bases: object
-Factory class to create the the different tensorflow graphs for ner_dl, generic_classifier, assertion_dl, relation_extraction annotators in spark-nlp healthcare
-Methods
-
-
-
-
-
-
-__init__
(*args, **kwargs)
-
-
-build
(model_name, build_params, model_location)
-Method that create the tf graph.
-
-get_models
()
-Method that return the available tf models in spark-nlp healthcare
-
-print_model_params
(model_name)
-Method that return the params allowed for the tf model.This method return the params with the description for every param.
-
-
-
-
-
-static build ( model_name , build_params , model_location , model_filename = 'auto' ) [source]
-Method that create the tf graph.
-
-Parameters
-
-model_name: str The name of the tf model that you want to build.Model availables ner_dl,generic_classifier,assertion_dl and relation_extraction
-
-build_params: dict Configuration params to build the tf graph for the specific model.
-
-model_location: str Path where the model will be saved
-
-model_filename: str Name of the .rb file. If you put auto the filename will be generated.
-
-
-
-
-Examples
->>> from sparknlp_jsl.training import tf_graph
->>> tf_graph . build ( "assertion_dl" , build_params = { "n_classes" : 10 }, model_location = "/tmp" , model_filename = "assertion_dl.pb" )
-
-
-
-
-
-
-static get_models ( ) [source]
-Method that return the available tf models in spark-nlp healthcare
-Examples
->>> from sparknlp_jsl.training import tf_graph_1x
->>> tf_graph_1x . get_models ()
-
-
-
-
-
-
-static print_model_params ( model_name ) [source]
-Method that return the params allowed for the tf model.This method return the params with the description for every param.
-
-Parameters
-
-model_name: str The name of the tf model name.Model availables ner_dl,generic_classifier,assertion_dl and relation_extraction
-
-
-
-
-Examples
->>> from sparknlp_jsl.training import tf_graph
->>> tf_graph . print_model_params ( "assertion_dl" )
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.WrongTFVersion.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.WrongTFVersion.html
deleted file mode 100644
index df162a30e3..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.WrongTFVersion.html
+++ /dev/null
@@ -1,192 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.WrongTFVersion — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders.WrongTFVersion
-
-
-exception sparknlp_jsl._tf_graph_builders_1x.graph_builders. WrongTFVersion [source]
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.html
deleted file mode 100644
index 4cdd75d0bd..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl._tf_graph_builders_1x.graph_builders.html
+++ /dev/null
@@ -1,226 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl._tf_graph_builders_1x.graph_builders
-
Classes
-
-
-
-
-
-
-AssertionTFGraphBuilder
-Class to build the the TF graphs for AssertionDLApproach
-
-GenericClassifierTFGraphBuilder
-Class to create the the TF graphs for GenericClassifierApproach
-
-NerTFGraphBuilder
-Class to build the the TF graphs for MedicalNerApproach.
-
-RelationExtractionTFGraphBuilder
-Class to build the the TF graphs for RelationExtractionApproach
-
-TFGraphBuilder
-Generic class to create the tensorflow graphs for 'ner_dl', 'generic_classifier', 'assertion_dl', 'relation_extraction' annotators in spark-nlp healthcare
-
-TFGraphBuilderFactory
-Factory class to create the the different tensorflow graphs for ner_dl, generic_classifier, assertion_dl, relation_extraction annotators in spark-nlp healthcare
-
-
-
-
Exceptions
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AnnotationMerger.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AnnotationMerger.html
deleted file mode 100644
index a637b3df7e..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AnnotationMerger.html
+++ /dev/null
@@ -1,1044 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AnnotationMerger — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AnnotationMerger
-
-
-class sparknlp_jsl.annotator. AnnotationMerger ( classname = 'com.johnsnowlabs.nlp.AnnotationMerger' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-
-Merges Annotations from multiple columns.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-ANY
-ANY
-
-
-
-
-
-Parameters
-
-inputType The type of the annotations that you want to merge. Possible values
-document|token|wordpiece|word_embeddings|sentence_embeddings|category|date|sentiment|pos|chunk|named_entity|regex|dependency|labeled_dependency|language|keyword
-
-
-
-
-Examples
->>> docs = [[ "" ]]
->>> test_data = spark . createDataFrame ( docs ) . toDF ( "text" )
->>> document1 = DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document1" )
->>> document2 = DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document2" )
->>> annotation_merger = AnnotationMerger () ... . setInputCols ( "document1" , "document2" ) ... . setInputType ( "document" ) ... . setOutputCol ( "all_docs" )
->>>
->>> pipeline = Pipeline () . setStages ([ document1 , document2 , annotation_merger ]) . fit ( docs )
->>> lp = LightPipeline ( pipeline )
->>> lp . fullAnnotate ( "one doc to be replicated" )
-[{'document1': [Annotation(document, 0, 23, one doc to be replicated, {})], 'document2': [Annotation(document, 0, 23, one doc to be replicated, {})], 'all_docs': [Annotation(document, 0, 23, one doc to be replicated, {}), Annotation(document, 0, 23, one doc to be replicated, {})]}]
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setInputType
(value)
-Sets the type of the entity that you want to filter by default sentence_embedding
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-getter_attrs
-
-
-inputCols
-
-
-inputType
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setInputType ( value ) [source]
-Sets the type of the entity that you want to filter by default sentence_embedding
-
-Parameters
-
-value int The type of the entity that you want to filter by default sentence_embedding
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionDLApproach.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionDLApproach.html
deleted file mode 100644
index f613cd1449..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionDLApproach.html
+++ /dev/null
@@ -1,1467 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AssertionDLApproach — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AssertionDLApproach
-
-
-class sparknlp_jsl.annotator. AssertionDLApproach [source]
-Bases: sparknlp.common.AnnotatorApproach
-Train a Assertion Model algorithm using deep learning.
-from extracted entities and text. AssertionLogRegModel requires DOCUMENT, CHUNK and WORD_EMBEDDINGS type
-annotator inputs, which can be obtained by e.g a
-The training data should have annotations columns of type DOCUMENT
, CHUNK
, WORD_EMBEDDINGS
, the label
column
-(The assertion status that you want to predict), the start
(the start index for the term that has the assertion status),
-the end
column (the end index for the term that has the assertion status).This model use a deep learning to predict the entity.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK, WORD_EMBEDDINGS
-ASSERTION
-
-
-
-
-Parameters
-
-label Column with one label per document. Example of possible values: “present”, “absent”, “hypothetical”, “conditional”, “associated_with_other_person”, etc.
-
-startCol Column that contains the token number for the start of the target
-
-endCol olumn that contains the token number for the end of the target
-
-batchSize Size for each batch in the optimization process
-
-epochs Number of epochs for the optimization process
-
-learningRate Learning rate for the optimization process
-
-dropout dropout”, “Dropout at the output of each layer
-
-maxSentLen Max length for an input sentence.
-
-graphFolder Folder path that contain external graph files
-
-configProtoBytes ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()
-
-validationSplit Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off.
-
-evaluationLogExtended Select if you want to have mode eval.
-
-testDataset Path to test dataset. If set used to calculate statistic on it during training.
-
-includeConfidence whether to include confidence scores in annotation metadata
-
-enableOutputLogs whether or not to output logs
-
-outputLogsPath Folder path to save training logs
-
-verbose Level of verbosity during training
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> from sparknlp.training import *
->>> from pyspark.ml import Pipeline
->>> document_assembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
->>> sentence_detector = SentenceDetector () \
-... . setInputCol ( "document" ) \
-... . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "sentence" ]) \
-... . setOutputCol ( "token" )
->>> embeddings = WordEmbeddingsModel . pretrained ( "embeddings_clinical" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "token" ]) \
-... . setOutputCol ( "word_embeddings" ) \
-... . setCaseSensitive ( False )
->>> chunk = Chunker () \
-... . setInputCols ([ sentence ]) \
-... . setChunkCol ( "chunk" ) \
-... . setOutputCol ( "chunk" )
->>> assertion = AssertionDLApproach () \
-... . setLabelCol ( "label" ) \
-... . setInputCols ([ "document" , "chunk" , "word_embeddings" ]) \
-... . setOutputCol ( "assertion" ) \
-... . setOutputCol ( "assertion" ) \
-... . setBatchSize ( 128 ) \
-... . setDropout ( 0.012 ) \
-... . setLearningRate ( 0.015 ) \
-... . setEpochs ( 1 ) \
-... . setStartCol ( "start" ) \
-... . setEndCol ( "end" ) \
-... . setMaxSentLen ( 250 )
->>> assertionPipeline = Pipeline ( stages = [
-... document_assembler ,
-... sentence_detector ,
-... tokenizer ,
-... embeddings ,
-... chunk ,
-... assertion ])
->>> assertionModel = assertionPipeline . fit ( dataset )
-
-
-Methods
-
-
-
-
-
-
-__init__
()
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-fit
(dataset[, params])
-Fits a model to the input dataset with optional parameters.
-
-fitMultiple
(dataset, paramMaps)
-Fits a model to the input dataset for each param map in paramMaps .
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBatchSize
(size)
-Set Size for each batch in the optimization process.
-
-setConfigProtoBytes
(b)
-Sets ConfigProto from tensorflow, serialized into byte array.
-
-setDropout
(rate)
-Set a dropout at the output of each layer
-
-setEnableOutputLogs
(value)
-Sets if you enable to output to annotators log folder.
-
-setEndCol
(e)
-Set column that contains the token number for the end of the target.
-
-setEpochs
(number)
-Sets number of epochs for the optimization process
-
-setEvaluationLogExtended
(v)
-Creates a Annotation from a Spark Row .
-
-setGraphFolder
(p)
-Sets folder path that contain external graph files.
-
-setIncludeConfidence
(value)
-Sets if you waht to include confidence scores in annotation metadata.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLabelCol
(label)
-Set a column with one label per document.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setLearningRate
(lamda)
-Set a learning rate for the optimization process
-
-setMaxSentLen
(length)
-Set the max length for an input sentence.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setOutputLogsPath
(value)
-Sets folder path that contain external graph files.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setStartCol
(s)
-Set a column that contains the token number for the start of the target
-
-setTestDataset
(path[, read_as, options])
-Sets path to test dataset.
-
-setValidationSplit
(v)
-Set Choose the proportion of training dataset to be validated against the model on each Epoch.
-
-setVerbose
(value)
-Sets level of verbosity during training.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-batchSize
-
-
-configProtoBytes
-
-
-dropout
-
-
-enableOutputLogs
-
-
-endCol
-
-
-epochs
-
-
-evaluationLogExtended
-
-
-getter_attrs
-
-
-graphFolder
-
-
-includeConfidence
-
-
-inputCols
-
-
-label
-
-
-lazyAnnotator
-
-
-learningRate
-
-
-maxSentLen
-
-
-outputCol
-
-
-outputLogsPath
-
-
-params
-Returns all params ordered by name.
-
-startCol
-
-
-testDataset
-
-
-validationSplit
-
-
-verbose
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-fit ( dataset , params = None )
-Fits a model to the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-params dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of
-param maps is given, this calls fit on each param map and returns a list of
-models.
-
-
-
-Returns
-
-Transformer
or a list of Transformer
fitted model(s)
-
-
-
-
-
-
-
-
-fitMultiple ( dataset , paramMaps )
-Fits a model to the input dataset for each param map in paramMaps .
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-paramMaps collections.abc.Sequence
A Sequence of param maps.
-
-
-
-Returns
-
-_FitMultipleIterator
A thread safe iterable which contains one model for each param map. Each
-call to next(modelIterator) will return (index, model) where model was fit
-using paramMaps[index] . index values may not be sequential.
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBatchSize ( size ) [source]
-Set Size for each batch in the optimization process.
-
-Parameters
-
-size int Size for each batch in the optimization process
-
-
-
-
-
-
-
-
-setConfigProtoBytes ( b ) [source]
-Sets ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()`.
-
-Parameters
-
-b bytes ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()
-
-
-
-
-
-
-
-
-setDropout ( rate ) [source]
-Set a dropout at the output of each layer
-
-Parameters
-
-rate float Dropout at the output of each layer
-
-
-
-
-
-
-
-
-setEnableOutputLogs ( value ) [source]
-Sets if you enable to output to annotators log folder.
-
-Parameters
-
-value srt Folder path that contain external graph files.
-
-
-
-
-
-
-
-
-setEndCol ( e ) [source]
-Set column that contains the token number for the end of the target.
-
-Parameters
-
-row str Column that contains the token number for the end of the target
-
-
-
-
-
-
-
-
-setEpochs ( number ) [source]
-Sets number of epochs for the optimization process
-
-Parameters
-
-number int Number of epochs for the optimization process
-
-
-
-
-
-
-
-
-setEvaluationLogExtended ( v ) [source]
-Creates a Annotation from a Spark Row .
-
-Parameters
-
-v bool Evaluation log extended.
-
-
-
-
-
-
-
-
-setGraphFolder ( p ) [source]
-Sets folder path that contain external graph files.
-
-Parameters
-
-p srt Folder path that contain external graph files.
-
-
-
-
-
-
-
-
-setIncludeConfidence ( value ) [source]
-Sets if you waht to include confidence scores in annotation metadata.
-
-Parameters
-
-p bool
-Value that selects if you want to use confidence scores in annotation metadata
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLabelCol ( label ) [source]
-Set a column with one label per document. Example of possible values: “present”, “absent”, “hypothetical”, “conditional”, “associated_with_other_person”, etc.
-
-Parameters
-
-label str label. Column with one label per document. Example of possible values: “present”, “absent”, “hypothetical”, “conditional”, “associated_with_other_person”, etc.
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setLearningRate ( lamda ) [source]
-Set a learning rate for the optimization process
-
-Parameters
-
-lamda float Learning rate for the optimization process.
-
-
-
-Returns
-
-Annotation The new Annotation.
-
-
-
-
-
-
-
-
-setMaxSentLen ( length ) [source]
-Set the max length for an input sentence.
-
-Parameters
-
-length int Max length for an input sentence.
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setOutputLogsPath ( value ) [source]
-Sets folder path that contain external graph files.
-
-Parameters
-
-value srt Folder path that contain external graph files.
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setStartCol ( s ) [source]
-Set a column that contains the token number for the start of the target
-
-Parameters
-
-s str Column that contains the token number for the start of the target
-
-
-
-
-
-
-
-
-setTestDataset ( path , read_as = 'SPARK' , options = {'format': 'parquet'} ) [source]
-Sets path to test dataset. If set used to calculate statistic on it during training.
-
-Parameters
-
-path srt Path to test dataset. If set used to calculate statistic on it during training.
-
-
-
-
-
-
-
-
-setValidationSplit ( v ) [source]
-
-Set Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off.
-
-
-
-Parameters
-
-v float Choose the proportion of training dataset to be validated against the model on each Epoch.
-The value should be between 0.0 and 1.0 and by default it is 0.0 and off.
-
-
-
-
-
-
-
-
-setVerbose ( value ) [source]
-Sets level of verbosity during training.
-
-Parameters
-
-value int Level of verbosity during training.
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionDLModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionDLModel.html
deleted file mode 100644
index 1f61385ed3..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionDLModel.html
+++ /dev/null
@@ -1,1118 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AssertionDLModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AssertionDLModel
-
-
-class sparknlp_jsl.annotator. AssertionDLModel ( classname = 'com.johnsnowlabs.nlp.annotators.assertion.dl.AssertionDLModel' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
, sparknlp.common.HasStorageRef
-AssertionDL is a deep Learning based approach used to extract Assertion Status
-from extracted entities and text. AssertionDLModel requires DOCUMENT, CHUNK and WORD_EMBEDDINGS type
-annotator inputs, which can be obtained by e.g a
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK, WORD_EMBEDDINGS
-ASSERTION
-
-
-
-
-Parameters
-
-maxSentLen Max length for an input sentence.
-
-targetNerLabels List of NER labels to mark as target for assertion, must match NER output.
-
-configProtoBytes ConfigProto from tensorflow, serialized into byte array.
-
-classes Tags used to trained this AssertionDLModel
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> data = spark . createDataFrame ([[ "Patient with severe fever and sore throat" ],[ "Patient shows no stomach pain" ],[ "She was maintained on an epidural and PCA for pain control." ]]) . toDF ( "text" )
->>> documentAssembler = DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document" )
->>> sentenceDetector = SentenceDetector () . setInputCols ([ "document" ]) . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () . setInputCols ([ "sentence" ]) . setOutputCol ( "token" )
->>> embeddings = WordEmbeddingsModel . pretrained ( "embeddings_clinical" , "en" , "clinical/models" ) \
-... . setOutputCol ( "embeddings" )
->>> nerModel = MedicalNerModel . pretrained ( "ner_clinical" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "token" , "embeddings" ]) . setOutputCol ( "ner" )
->>> nerConverter = NerConverter () . setInputCols ([ "sentence" , "token" , "ner" ]) . setOutputCol ( "ner_chunk" )
->>> clinicalAssertion = AssertionDLModel . pretrained ( "assertion_dl" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "ner_chunk" , "embeddings" ]) \
-... . setOutputCol ( "assertion" )
->>> assertionPipeline = Pipeline ( stages = [
-... documentAssembler ,
-... sentenceDetector ,
-... tokenizer ,
-... embeddings ,
-... nerModel ,
-... nerConverter ,
-... clinicalAssertion
-... ])
-
-
->>> assertionModel = assertionPipeline . fit ( data )
-
-
->>> result = assertionModel . transform ( data )
->>> result . selectExpr ( "ner_chunk.result as ner" , "assertion.result" ) . show ( 3 , truncate = False )
-+--------------------------------+--------------------------------+
-|ner |result |
-+--------------------------------+--------------------------------+
-|[severe fever, sore throat] |[present, present] |
-|[stomach pain] |[absent] |
-|[an epidural, PCA, pain control]|[present, present, hypothetical]|
-+--------------------------------+--------------------------------+
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-getStorageRef
()
-Gets unique reference name for identification.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-pretrained
(name[, lang, remote_loc])
-
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setConfigProtoBytes
(b)
-
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setStorageRef
(value)
-Sets unique reference name for identification.
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-classes
-
-
-configProtoBytes
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-maxSentLen
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-storageRef
-
-
-targetNerLabels
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-getStorageRef ( )
-Gets unique reference name for identification.
-
-Returns
-
-str Unique reference name for identification
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setStorageRef ( value )
-Sets unique reference name for identification.
-
-Parameters
-
-value str Unique reference name for identification
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionFilterer.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionFilterer.html
deleted file mode 100644
index 278b883ff1..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionFilterer.html
+++ /dev/null
@@ -1,1138 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AssertionFilterer — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AssertionFilterer
-
-
-class sparknlp_jsl.annotator. AssertionFilterer ( classname = 'com.johnsnowlabs.nlp.annotators.chunker.AssertionFilterer' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Filters entities coming from ASSERTION type annotations and returns the CHUNKS.
-Filters can be set via a white list on the extracted chunk, the assertion or a regular expression.
-White list for assertion is enabled by default. To use chunk white list, criteria
has to be set to isin
.
-For regex, criteria
has to be set to regex
.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK, ASSERTION
-CHUNK
-
-
-
-
-Parameters
-
-whiteList If defined, list of entities to process. The rest will be ignored
-
-regex If defined, list of entities to process. The rest will be ignored.
-
-criteria
-Tag representing what is the criteria to filter the chunks. possibles values (assertion|isIn|regex) assertion: Filter by the assertion
-isIn : Filter by the chunk
-regex : Filter using a regex
-
-
-
-entitiesConfidence Entity pairs to remove based on the confidence level
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
- To see how the assertions are extracted, see the example for AssertionDLModel.
- Define an extra step where the assertions are filtered
->>> assertionFilterer = AssertionFilterer () \
-... . setInputCols ([ "sentence" , "ner_chunk" , "assertion" ]) \
-... . setOutputCol ( "filtered" ) \
-... . setCriteria ( "assertion" ) \
-... . setWhiteList ([ "present" ])
-...
->>> assertionPipeline = Pipeline ( stages = [
-... documentAssembler ,
-... sentenceDetector ,
-... tokenizer ,
-... embeddings ,
-... nerModel ,
-... nerConverter ,
-... clinicalAssertion ,
-... assertionFilterer
-... ])
-...
->>> assertionModel = assertionPipeline . fit ( data )
->>> result = assertionModel . transform ( data )
-
-
->>> result . selectExpr ( "ner_chunk.result" , "assertion.result" ) . show ( 3 , truncate = False )
-+--------------------------------+--------------------------------+
-|result |result |
-+--------------------------------+--------------------------------+
-|[severe fever, sore throat] |[present, present] |
-|[stomach pain] |[absent] |
-|[an epidural, PCA, pain control]|[present, present, hypothetical]|
-+--------------------------------+--------------------------------+
-
-
->>> result . select ( "filtered.result" ) . show ( 3 , truncate = False )
-+---------------------------+
-|result |
-+---------------------------+
-|[severe fever, sore throat]|
-|[] |
-|[an epidural, PCA] |
-+---------------------------+
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setCriteria
(s)
-Set tag representing what is the criteria to filter the chunks.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setRegex
(value)
-Sets llist of regex to process.
-
-setWhiteList
(value)
-Sets list of entities to process.
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-criteria
-
-
-entitiesConfidence
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-regex
-
-
-whiteList
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setCriteria ( s ) [source]
-Set tag representing what is the criteria to filter the chunks. possibles values (assertion|isIn|regex)
-
-Parameters
-
-pairs list List of dash-separated pairs of named entities
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setRegex ( value ) [source]
-Sets llist of regex to process. The rest will be ignored.
-
-Parameters
-
-value list List of dash-separated pairs of named entities
-
-
-
-
-
-
-
-
-setWhiteList ( value ) [source]
-Sets list of entities to process. The rest will be ignored.
-
-Parameters
-
-value list If defined, list of entities to process. The rest will be ignored.
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionLogRegApproach.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionLogRegApproach.html
deleted file mode 100644
index 098e8468ed..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionLogRegApproach.html
+++ /dev/null
@@ -1,1169 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AssertionLogRegApproach — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AssertionLogRegApproach
-
-
-class sparknlp_jsl.annotator. AssertionLogRegApproach [source]
-Bases: sparknlp.common.AnnotatorApproach
-Train a Assertion algorithm using a regression log model.
-Excluding the label, this can be done with for example:
-- a :class: SentenceDetector,
-- a :class: Chunk,
-- a :class: WordEmbeddingsModel.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK, WORD_EMBEDDINGS
-ASSERTION
-
-
-
-
-Parameters
-
-label Column with label per each token
-
-maxIter Max number of iterations for algorithm
-
-regParam Regularization parameter
-
-eNetParam Elastic net parameter
-
-beforeParam Length of the context before the target
-
-afterParam Length of the context after the target
-
-startCol Column that contains the token number for the start of the target”
-
-externalFeatures Additional dictionaries paths to use as a features
-
-endCol Column that contains the token number for the end of the target
-
-nerCol Column with NER type annotation output, use either nerCol or startCol and endCol
-
-targetNerLabels List of NER labels to mark as target for assertion, must match NER output
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> from sparknlp.training import *
->>> from pyspark.ml import Pipeline
->>> document_assembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
-...
->>> sentence_detector = SentenceDetector () \
-... . setInputCol ( "document" ) \
-... . setOutputCol ( "sentence" )
-...
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "sentence" ]) \
-... . setOutputCol ( "token" )
-...
->>> glove = WordEmbeddingsModel . pretrained ( "embeddings_clinical" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "token" ]) \
-... . setOutputCol ( "word_embeddings" ) ...
->>> chunk = Chunker () \
-... . setInputCols ([ sentence ]) \
-... . setChunkCol ( "chunk" ) \
-... . setOutputCol ( "chunk" )
-...
-Then the AssertionLogRegApproach model is defined. Label column is needed in the dataset for training.
->>> assertion = AssertionLogRegApproach () \
-... . setLabelCol ( "label" ) \
-... . setInputCols ([ "document" , "chunk" , "word_embeddings" ]) \
-... . setOutputCol ( "assertion" ) \
-... . setReg ( 0.01 ) \
-... . setBefore ( 11 ) \
-... . setAfter ( 13 ) \
-... . setStartCol ( "start" ) \
-... . setEndCol ( "end" )
-...
->>> assertionPipeline = Pipeline ( stages = [
-... document_assembler ,
-... sentence_detector ,
-... tokenizer ,
-... glove ,
-... chunk ,
-... assertion
-...])
-
-
->>> assertionModel = assertionPipeline . fit ( dataset )
-
-
-Methods
-
-
-
-
-
-
-__init__
()
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-fit
(dataset[, params])
-Fits a model to the input dataset with optional parameters.
-
-fitMultiple
(dataset, paramMaps)
-Fits a model to the input dataset for each param map in paramMaps .
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setAfter
(after)
-
-
-setBefore
(before)
-
-
-setEndCol
(e)
-
-
-setEnet
(enet)
-
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLabelCol
(label)
-
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMaxIter
(maxiter)
-
-
-setNerCol
(n)
-
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setReg
(lamda)
-
-
-setStartCol
(s)
-
-
-setTargetNerLabels
(v)
-
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-afterParam
-
-
-beforeParam
-
-
-eNetParam
-
-
-endCol
-
-
-getter_attrs
-
-
-inputCols
-
-
-label
-
-
-lazyAnnotator
-
-
-maxIter
-
-
-nerCol
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-regParam
-
-
-startCol
-
-
-targetNerLabels
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-fit ( dataset , params = None )
-Fits a model to the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-params dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of
-param maps is given, this calls fit on each param map and returns a list of
-models.
-
-
-
-Returns
-
-Transformer
or a list of Transformer
fitted model(s)
-
-
-
-
-
-
-
-
-fitMultiple ( dataset , paramMaps )
-Fits a model to the input dataset for each param map in paramMaps .
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-paramMaps collections.abc.Sequence
A Sequence of param maps.
-
-
-
-Returns
-
-_FitMultipleIterator
A thread safe iterable which contains one model for each param map. Each
-call to next(modelIterator) will return (index, model) where model was fit
-using paramMaps[index] . index values may not be sequential.
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionLogRegModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionLogRegModel.html
deleted file mode 100644
index c0dc42da51..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AssertionLogRegModel.html
+++ /dev/null
@@ -1,1136 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AssertionLogRegModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AssertionLogRegModel
-
-
-class sparknlp_jsl.annotator. AssertionLogRegModel ( classname = 'com.johnsnowlabs.nlp.annotators.assertion.logreg.AssertionLogRegModel' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
, sparknlp.common.HasStorageRef
-
-This is a main class in AssertionLogReg family. Logarithmic Regression is used to extract Assertion Status from extracted entities and text. AssertionLogRegModel requires DOCUMENT, CHUNK and WORD_EMBEDDINGS type
-annotator inputs, which can be obtained by e.g a
-
-
-Excluding the label, this can be done with for example:
-
-a SentenceDetector
,
-a Chunk
,
-a WordEmbeddingsModel
.
-
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK, WORD_EMBEDDINGS
-ASSERTION
-
-
-
-
-Parameters
-
-beforeParam Length of the context before the target
-
-afterParam Length of the context after the target
-
-startCol Column that contains the token number for the start of the target”
-
-endCol Column that contains the token number for the end of the target
-
-nerCol Column with NER type annotation output, use either nerCol or startCol and endCol
-
-targetNerLabels
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> from sparknlp.training import *
->>> from pyspark.ml import Pipeline
-
-
->>> document_assembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
-...
->>> sentence_detector = SentenceDetector () \
-... . setInputCol ( "document" ) \
-... . setOutputCol ( "sentence" )
-...
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "sentence" ]) \
-... . setOutputCol ( "token" )
-...
->>> embeddings = WordEmbeddingsModel . pretrained ( "embeddings_clinical" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "token" ]) \
-... . setOutputCol ( "word_embeddings" ) ... . setCaseSensitive ( False )
-...
->>> chunk = Chunker () \
-... . setInputCols ([ sentence ]) \
-... . setChunkCol ( "chunk" ) \
-... . setOutputCol ( "chunk" )
-...
-Then the AssertionLogRegApproach model is defined. Label column is needed in the dataset for training.
->>> assertion = AssertionLogRegModel () . pretrained () \
-... . setLabelCol ( "label" ) \
-... . setInputCols ([ "document" , "chunk" , "word_embeddings" ]) \
-... . setOutputCol ( "assertion" ) \
-...
-...
->>> assertionPipeline = Pipeline ( stages = [
-... document_assembler ,
-... sentence_detector ,
-... tokenizer ,
-... embeddings ,
-... chunk ,
-... assertion
->>>])
-
-
->>> assertionModel = assertionPipeline . fit ( dataset )
->>> assertionPretrained = assertionModel . transform ( dataset )
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-getStorageRef
()
-Gets unique reference name for identification.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-pretrained
(name[, lang, remote_loc])
-
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setStorageRef
(value)
-Sets unique reference name for identification.
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-afterParam
-
-
-beforeParam
-
-
-endCol
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-nerCol
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-startCol
-
-
-storageRef
-
-
-targetNerLabels
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-getStorageRef ( )
-Gets unique reference name for identification.
-
-Returns
-
-str Unique reference name for identification
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setStorageRef ( value )
-Sets unique reference name for identification.
-
-Parameters
-
-value str Unique reference name for identification
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AverageEmbeddings.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AverageEmbeddings.html
deleted file mode 100644
index 8cc5117cc2..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.AverageEmbeddings.html
+++ /dev/null
@@ -1,983 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AverageEmbeddings — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.AverageEmbeddings
-
-
-class sparknlp_jsl.annotator. AverageEmbeddings ( classname = 'com.johnsnowlabs.nlp.annotators.embeddings.AverageEmbeddings' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.BertSentenceChunkEmbeddings.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.BertSentenceChunkEmbeddings.html
deleted file mode 100644
index a93af7e1e3..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.BertSentenceChunkEmbeddings.html
+++ /dev/null
@@ -1,1351 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.BertSentenceChunkEmbeddings — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.BertSentenceChunkEmbeddings
-
-
-class sparknlp_jsl.annotator. BertSentenceChunkEmbeddings ( classname = 'com.johnsnowlabs.nlp.embeddings.BertSentenceChunkEmbeddings' , java_model = None ) [source]
-Bases: sparknlp.annotator.BertSentenceEmbeddings
-BERT Sentence embeddings for chunk annotations which take into account the context of the sentence the chunk appeared in.
-This is an extension of BertSentenceEmbeddings which combines the embedding of a chunk with the embedding of the
-surrounding sentence. For each input chunk annotation, it finds the corresponding sentence, computes
-the BERT sentence embedding of both the chunk and the sentence and averages them. The resulting embeddings are
-useful in cases, in which one needs a numerical representation of a text chunk which is sensitive to
-the context it appears in.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK
-SENTENCE_EMBEDDINGS
-
-
-
-
-Parameters
-
-chunkWeight Relative weight of chunk embeddings in comparison to sentence embeddings. The value should between 0 and 1.
-The default is 0.5, which means the chunk and sentence embeddings are given equal weight.
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
-
-
-First extract the prerequisites for the NerDLModel
->>> documentAssembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
->>> sentence = SentenceDetector () \
-... . setInputCols ([ "document" ]) \
-... . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "sentence" ]) \
-... . setOutputCol ( "token" )
->>> embeddings = WordEmbeddingsModel . pretrained () \
-... . setInputCols ([ "sentence" , "token" ]) \
-... . setOutputCol ( "bert" )
->>> nerTagger = MedicalNerDLModel . pretrained () \
-... . setInputCols ([ "sentence" , "token" , "bert" ]) \
-... . setOutputCol ( "ner" )
->>> nerConverter = NerConverter () \
-... . setInputCols ([ "sentence" , "token" , "ner" ]) \
-... . setOutputCol ( "ner_chunk" )
->>> embeddings = BertSentenceChunkEmbeddings . pretrained ( "sbluebert_base_uncased_mli" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "ner_chunk" ]) \
-... . setOutputCol ( "sentence_chunk_embeddings" )
->>> pipeline = Pipeline () . setStages ([
-... documentAssembler ,
-... sentence ,
-... tokenizer ,
-... embeddings ,
-... nerTagger ,
-... nerConverter
-... embeddings
-... ])
->>> data = spark . createDataFrame ([[ "Her Diabetes has become type 2 in the last year with her Diabetes.He complains of swelling in his right forearm." ]]) . toDF ( "text" )
->>> result = pipeline . fit ( data ) . transform ( data )
->>> result
-... . selectExpr ( "explode(sentence_chunk_embeddings) AS s" )
-... . selectExpr ( "s.result" , "slice(s.embeddings, 1, 5) AS averageEmbedding" )
-... . show ( truncate = false )
-+-----------------------------+-----------------------------------------------------------------+
-| result| averageEmbedding|
-+-----------------------------+-----------------------------------------------------------------+
-|Her Diabetes |[-0.31995273, -0.04710883, -0.28973156, -0.1294758, 0.12481072] |
-|type 2 |[-0.027161136, -0.24613449, -0.0949309, 0.1825444, -0.2252143] |
-|her Diabetes |[-0.31995273, -0.04710883, -0.28973156, -0.1294758, 0.12481072] |
-|swelling in his right forearm|[-0.45139068, 0.12400375, -0.0075617577, -0.90806055, 0.12871636]|
-+-----------------------------+-----------------------------------------------------------------+
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getBatchSize
()
-Gets current batch size.
-
-getCaseSensitive
()
-Gets whether to ignore case in tokens for embeddings matching.
-
-getDimension
()
-Gets embeddings dimension.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-getStorageRef
()
-Gets unique reference name for identification.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-loadSavedModel
(folder, spark_session)
-Loads a locally saved model.
-
-pretrained
([name, lang, remote_loc])
-Downloads and loads a pretrained model.
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBatchSize
(v)
-Sets batch size.
-
-setCaseSensitive
(value)
-Sets whether to ignore case in tokens for embeddings matching.
-
-setChunkWeight
(value)
-Sets the relative weight of chunk embeddings in comparison to sentence embeddings. The value should between 0 and 1.
-
-setConfigProtoBytes
(b)
-Sets configProto from tensorflow, serialized into byte array.
-
-setDimension
(value)
-Sets embeddings dimension.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setIsLong
(value)
-Sets whether to use Long type instead of Int type for inputs buffer.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMaxSentenceLength
(value)
-Sets max sentence length to process.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setStorageRef
(value)
-Sets unique reference name for identification.
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-batchSize
-
-
-caseSensitive
-
-
-chunkWeight
-
-
-configProtoBytes
-
-
-dimension
-
-
-getter_attrs
-
-
-inputCols
-
-
-isLong
-
-
-lazyAnnotator
-
-
-maxSentenceLength
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-storageRef
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getBatchSize ( )
-Gets current batch size.
-
-Returns
-
-int Current batch size
-
-
-
-
-
-
-
-
-getCaseSensitive ( )
-Gets whether to ignore case in tokens for embeddings matching.
-
-Returns
-
-bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-getDimension ( )
-Gets embeddings dimension.
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-getStorageRef ( )
-Gets unique reference name for identification.
-
-Returns
-
-str Unique reference name for identification
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-static loadSavedModel ( folder , spark_session )
-Loads a locally saved model.
-
-Parameters
-
-folder str Folder of the saved model
-
-spark_session pyspark.sql.SparkSession The current SparkSession
-
-
-
-Returns
-
-BertSentenceEmbeddings The restored model
-
-
-
-
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-static pretrained ( name = 'sent_small_bert_L2_768' , lang = 'en' , remote_loc = None ) [source]
-Downloads and loads a pretrained model.
-
-Parameters
-
-name str, optional Name of the pretrained model, by default “sent_small_bert_L2_768”
-
-lang str, optional Language of the pretrained model, by default “en”
-
-remote_loc str, optional Optional remote address of the resource, by default None. Will use
-Spark NLPs repositories otherwise.
-
-
-
-Returns
-
-BertSentenceEmbeddings The restored model
-
-
-
-
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBatchSize ( v )
-Sets batch size.
-
-Parameters
-
-v int Batch size
-
-
-
-
-
-
-
-
-setCaseSensitive ( value )
-Sets whether to ignore case in tokens for embeddings matching.
-
-Parameters
-
-value bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-setChunkWeight ( value ) [source]
-
-Sets the relative weight of chunk embeddings in comparison to sentence embeddings. The value should between 0 and 1. The default is 0.5, which means the chunk and sentence embeddings are given equal weight.
-
-
-
-Parameters
-
-value float Relative weight of chunk embeddings in comparison to sentence embeddings. The value should between 0 and 1.
-The default is 0.5, which means the chunk and sentence embeddings are given equal weight.
-
-
-
-
-
-
-
-
-setConfigProtoBytes ( b )
-Sets configProto from tensorflow, serialized into byte array.
-
-Parameters
-
-b List[str] ConfigProto from tensorflow, serialized into byte array
-
-
-
-
-
-
-
-
-setDimension ( value )
-Sets embeddings dimension.
-
-Parameters
-
-value int Embeddings dimension
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setIsLong ( value )
-Sets whether to use Long type instead of Int type for inputs buffer.
-Some Bert models require Long instead of Int.
-
-Parameters
-
-value bool Whether to use Long type instead of Int type for inputs buffer
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMaxSentenceLength ( value )
-Sets max sentence length to process.
-
-Parameters
-
-value int Max sentence length to process
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setStorageRef ( value )
-Sets unique reference name for identification.
-
-Parameters
-
-value str Unique reference name for identification
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.Chunk2Token.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.Chunk2Token.html
deleted file mode 100644
index a94e94ab78..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.Chunk2Token.html
+++ /dev/null
@@ -1,983 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.Chunk2Token — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.Chunk2Token
-
-
-class sparknlp_jsl.annotator. Chunk2Token ( classname = 'com.johnsnowlabs.nlp.annotators.Chunk2Token' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkConverter.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkConverter.html
deleted file mode 100644
index a1233a362d..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkConverter.html
+++ /dev/null
@@ -1,1020 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkConverter — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkConverter
-
-
-class sparknlp_jsl.annotator. ChunkConverter ( classname = 'com.johnsnowlabs.nlp.annotators.chunker.ChunkConverter' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Convert chunks from regexMatcher to chunks with a entity in the metadata.
-Use the identifier or field as a entity.
-Examples
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-CHUNK
-CHUNK
-
-
-
->>> test_data = spark . createDataFrame ([
-... ( 1 , "My first sentence with the first rule. This is my second sentence with ceremonies rule." ),
-... ]) . toDF ( "id" , "text" )
->>> document_assembler = DocumentAssembler () . setInputCol ( 'text' ) . setOutputCol ( 'document' )
->>> sentence_detector = SentenceDetector () . setInputCols ([ "document" ]) . setOutputCol ( "sentence" )
->>> regex_matcher = RegexMatcher () ... . setInputCols ( "sentence" ) ... . setOutputCol ( "regex" ) ... . setExternalRules ( path = "../src/test/resources/regex-matcher/rules.txt" , delimiter = "," )
->>> chunkConverter = ChunkConverter () . setInputCols ( "regex" ) . setOutputCol ( "chunk" )
->>> pipeline = Pipeline ( stages = [ document_assembler , sentence_detector , regex_matcher , regex_matcher , chunkConverter ])
->>> model = pipeline . fit ( test_data )
->>> outdf = model . transform ( test_data )
-+------------------------------------------------------------------------------------------------+
-|col |
-+------------------------------------------------------------------------------------------------+
-|[chunk, 23, 31, the first, [identifier -> NAME, sentence -> 0, chunk -> 0, entity -> NAME], []] |
-|[chunk, 71, 80, ceremonies, [identifier -> NAME, sentence -> 1, chunk -> 0, entity -> NAME], []]|
-+------------------------------------------------------------------------------------------------+
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkFilterer.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkFilterer.html
deleted file mode 100644
index b749d0cce8..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkFilterer.html
+++ /dev/null
@@ -1,1097 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkFilterer — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkFilterer
-
-
-class sparknlp_jsl.annotator. ChunkFilterer ( classname = 'com.johnsnowlabs.nlp.annotators.chunker.ChunkFilterer' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-
-Model that Filters entities coming from CHUNK annotations. Filters can be set via a white list of terms or a regular expression. White list criteria is enabled by default. To use regex, criteria has to be set to regex .This model was trained using the ChunkFiltererApproach
-and has embeded the list of pairs (entity,confidenceThreshold).
-
-
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK, ASSERTION
-CHUNK
-
-
-
-
-Parameters
-
-whiteList If defined, list of entities to process. The rest will be ignored
-
-regex If defined, list of entities to process. The rest will be ignored.
-
-criteria
-Tag representing what is the criteria to filter the chunks. possibles values (assertion|isIn|regex) isIn : Filter by the chunk
-regex : Filter using a regex
-
-
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> data = spark . createDataFrame ([[ "Has a past history of gastroenteritis and stomach pain, however patient ..." ]]) . toDF ( "text" )
->>> docAssembler = DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document" )
->>> sentenceDetector = SentenceDetector () . setInputCols ([ "document" ]) . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () . setInputCols ([ "sentence" ]) . setOutputCol ( "token" )
->>> posTagger = PerceptronModel . pretrained () ... . setInputCols ([ "sentence" , "token" ]) ... . setOutputCol ( "pos" )
->>> chunker = Chunker () ... . setInputCols ([ "pos" , "sentence" ]) ... . setOutputCol ( "chunk" ) ... . setRegexParsers ([ "(<NN>)+" ])
-...
->>> chunkerFilter = ChunkFilterer () ... . setInputCols ([ "sentence" , "chunk" ]) ... . setOutputCol ( "filtered" ) ... . setCriteria ( "isin" ) ... . setWhiteList ([ "gastroenteritis" ])
-...
->>> pipeline = Pipeline ( stages = [
-... docAssembler ,
-... sentenceDetector ,
-... tokenizer ,
-... posTagger ,
-... chunker ,
-... chunkerFilter ])
-...
->>> result = pipeline . fit ( data ) . transform ( data )
->>> result . selectExpr ( "explode(chunk)" ) . show ( truncate = False )
-
-
->>> result . selectExpr ( "explode(chunk)" ) . show ( truncate = False )
-+---------------------------------------------------------------------------------+
-|col |
-+---------------------------------------------------------------------------------+
-|{chunk, 11, 17, history, {sentence -> 0, chunk -> 0}, []} |
-|{chunk, 22, 36, gastroenteritis, {sentence -> 0, chunk -> 1}, []} |
-|{chunk, 42, 53, stomach pain, {sentence -> 0, chunk -> 2}, []} |
-|{chunk, 64, 70, patient, {sentence -> 0, chunk -> 3}, []} |
-|{chunk, 81, 110, stomach pain now.We don't care, {sentence -> 0, chunk -> 4}, []}|
-|{chunk, 118, 132, gastroenteritis, {sentence -> 0, chunk -> 5}, []} |
-+---------------------------------------------------------------------------------+
-
-
->>> result . selectExpr ( "explode(filtered)" ) . show ( truncate = False )
-+-------------------------------------------------------------------+
-|col |
-+-------------------------------------------------------------------+
-|{chunk, 22, 36, gastroenteritis, {sentence -> 0, chunk -> 1}, []} |
-|{chunk, 118, 132, gastroenteritis, {sentence -> 0, chunk -> 5}, []}|
-+-------------------------------------------------------------------+
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setCriteria
(s)
-
-
-setFilterEntity
(s)
-
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setRegex
(value)
-
-
-setWhiteList
(value)
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-criteria
-
-
-filterValue
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-regex
-
-
-whiteList
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkFiltererApproach.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkFiltererApproach.html
deleted file mode 100644
index 9fb32677d1..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkFiltererApproach.html
+++ /dev/null
@@ -1,1189 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkFiltererApproach — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkFiltererApproach
-
-
-class sparknlp_jsl.annotator. ChunkFiltererApproach ( classname = 'com.johnsnowlabs.nlp.annotators.chunker.ChunkFiltererApproach' ) [source]
-Bases: sparknlp.common.AnnotatorApproach
-
-Model that Filters entities coming from CHUNK annotations. Filters can be set via a white list of terms or a regular expression. White list criteria is enabled by default. To use regex, criteria has to be set to regex .
-
-
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK, ASSERTION
-CHUNK
-
-
-
-
-Parameters
-
-whiteList If defined, list of entities to process. The rest will be ignored
-
-regex If defined, list of entities to process. The rest will be ignored.
-
-criteria
-Tag representing what is the criteria to filter the chunks. possibles values (assertion|isIn|regex) isIn : Filter by the chunk
-regex : Filter using a regex
-
-
-
-entitiesConfidence Path to csv with pairs (entity,confidenceThreshold). Filter the chunks with entities which have confidence lower than the confidence threshold.
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> data = spark . createDataFrame ([[ "Has a past history of gastroenteritis and stomach pain, however patient ..." ]]) . toDF ( "text" )
->>> docAssembler = DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document" )
->>> sentenceDetector = SentenceDetector () . setInputCols ([ "document" ]) . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () . setInputCols ([ "sentence" ]) . setOutputCol ( "token" )
->>> posTagger = PerceptronModel . pretrained () ... . setInputCols ([ "sentence" , "token" ]) ... . setOutputCol ( "pos" )
->>> chunker = Chunker () ... . setInputCols ([ "pos" , "sentence" ]) ... . setOutputCol ( "chunk" ) ... . setRegexParsers ([ "(<NN>)+" ])
-...
->>> chunkerFilter = ChunkFiltererApproach () ... . setInputCols ([ "sentence" , "chunk" ]) ... . setOutputCol ( "filtered" ) ... . setCriteria ( "isin" ) ... . setWhiteList ([ "gastroenteritis" ])
-...
->>> pipeline = Pipeline ( stages = [
-... docAssembler ,
-... sentenceDetector ,
-... tokenizer ,
-... posTagger ,
-... chunker ,
-... chunkerFilter ])
-...
->>> result = pipeline . fit ( data ) . transform ( data )
->>> result . selectExpr ( "explode(chunk)" ) . show ( truncate = False )
-
-
->>> result . selectExpr ( "explode(chunk)" ) . show ( truncate = False )
-+---------------------------------------------------------------------------------+
-|col |
-+---------------------------------------------------------------------------------+
-|{chunk, 11, 17, history, {sentence -> 0, chunk -> 0}, []} |
-|{chunk, 22, 36, gastroenteritis, {sentence -> 0, chunk -> 1}, []} |
-|{chunk, 42, 53, stomach pain, {sentence -> 0, chunk -> 2}, []} |
-|{chunk, 64, 70, patient, {sentence -> 0, chunk -> 3}, []} |
-|{chunk, 81, 110, stomach pain now.We don't care, {sentence -> 0, chunk -> 4}, []}|
-|{chunk, 118, 132, gastroenteritis, {sentence -> 0, chunk -> 5}, []} |
-+---------------------------------------------------------------------------------+
-
-
->>> result . selectExpr ( "explode(filtered)" ) . show ( truncate = False )
-+-------------------------------------------------------------------+
-|col |
-+-------------------------------------------------------------------+
-|{chunk, 22, 36, gastroenteritis, {sentence -> 0, chunk -> 1}, []} |
-|{chunk, 118, 132, gastroenteritis, {sentence -> 0, chunk -> 5}, []}|
-+-------------------------------------------------------------------+
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname])
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-fit
(dataset[, params])
-Fits a model to the input dataset with optional parameters.
-
-fitMultiple
(dataset, paramMaps)
-Fits a model to the input dataset for each param map in paramMaps .
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setCriteria
(s)
-Set tag representing what is the criteria to filter the chunks.
-
-setEntitiesConfidenceResource
(path[, ...])
-
-
-setFilterEntity
(s)
-Set tag representing what is the criteria to filter the chunks.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setRegex
(value)
-Sets llist of regex to process.
-
-setWhiteList
(value)
-Sets list of entities to process.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-criteria
-
-
-entitiesConfidenceResource
-
-
-filterValue
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-regex
-
-
-whiteList
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-fit ( dataset , params = None )
-Fits a model to the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-params dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of
-param maps is given, this calls fit on each param map and returns a list of
-models.
-
-
-
-Returns
-
-Transformer
or a list of Transformer
fitted model(s)
-
-
-
-
-
-
-
-
-fitMultiple ( dataset , paramMaps )
-Fits a model to the input dataset for each param map in paramMaps .
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-paramMaps collections.abc.Sequence
A Sequence of param maps.
-
-
-
-Returns
-
-_FitMultipleIterator
A thread safe iterable which contains one model for each param map. Each
-call to next(modelIterator) will return (index, model) where model was fit
-using paramMaps[index] . index values may not be sequential.
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setCriteria ( s ) [source]
-Set tag representing what is the criteria to filter the chunks. possibles values (isIn|regex)
-
-Parameters
-
-s str List of dash-separated pairs of named entities
-
-
-
-
-
-
-
-
-setFilterEntity ( s ) [source]
-Set tag representing what is the criteria to filter the chunks. possibles values (assertion|isIn|regex)
-
-Parameters
-
-s str possibles values result|entity.
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setRegex ( value ) [source]
-Sets llist of regex to process. The rest will be ignored.
-
-Parameters
-
-value list List of dash-separated pairs of named entities
-
-
-
-
-
-
-
-
-setWhiteList ( value ) [source]
-Sets list of entities to process. The rest will be ignored.
-
-Parameters
-
-value list If defined, list of entities to process. The rest will be ignored.
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkKeyPhraseExtraction.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkKeyPhraseExtraction.html
deleted file mode 100644
index 334f3cfd72..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkKeyPhraseExtraction.html
+++ /dev/null
@@ -1,1465 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkKeyPhraseExtraction — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkMergeApproach.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkMergeApproach.html
deleted file mode 100644
index 0fe07033bf..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkMergeApproach.html
+++ /dev/null
@@ -1,1199 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkMergeApproach — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkMergeApproach
-
-
-class sparknlp_jsl.annotator. ChunkMergeApproach [source]
-Bases: sparknlp.common.AnnotatorApproach
-Merges two chunk columns coming from two annotators(NER, ContextualParser or any other annotator producing
-chunks). The merger of the two chunk columns is made by selecting one chunk from one of the columns according
-to certain criteria.
-The decision on which chunk to select is made according to the chunk indices in the source document.
-(chunks with longer lengths and highest information will be kept from each source)
-Labels can be changed by setReplaceDictResource.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-CHUNK,CHUNK
-CHUNK
-
-
-
-
-Parameters
-
-mergeOverlapping whether to merge overlapping matched chunks. Defaults false
-
-falsePositivesResource file with false positive pairs
-
-replaceDictResource replace dictionary pairs
-
-chunkPrecedence Select what is the precedence when two chunks have the same start and end indices. Possible values are [entity|identifier|field]
-
-blackList If defined, list of entities to ignore. The rest will be proccessed.
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
-Define a pipeline with 2 different NER models with a ChunkMergeApproach at the end
->>> data = spark . createDataFrame ([[ "A 63-year-old man presents to the hospital ..." ]]) . toDF ( "text" )
->>> pipeline = Pipeline ( stages = [
-... DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document" ),
-... SentenceDetector () . setInputCols ([ "document" ]) . setOutputCol ( "sentence" ),
-... Tokenizer () . setInputCols ([ "sentence" ]) . setOutputCol ( "token" ),
-... WordEmbeddingsModel . pretrained ( "embeddings_clinical" , "en" , "clinical/models" ) . setOutputCol ( "embs" ),
-... MedicalNerModel . pretrained ( "ner_jsl" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "token" , "embs" ]) . setOutputCol ( "jsl_ner" ),
-... NerConverter () . setInputCols ([ "sentence" , "token" , "jsl_ner" ]) . setOutputCol ( "jsl_ner_chunk" ),
-... MedicalNerModel . pretrained ( "ner_bionlp" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "token" , "embs" ]) . setOutputCol ( "bionlp_ner" ),
-... NerConverter () . setInputCols ([ "sentence" , "token" , "bionlp_ner" ]) \
-... . setOutputCol ( "bionlp_ner_chunk" ),
-... ChunkMergeApproach () . setInputCols ([ "jsl_ner_chunk" , "bionlp_ner_chunk" ]) . setOutputCol ( "merged_chunk" )
->>> ])
->>> result = pipeline . fit ( data ) . transform ( data ) . cache ()
->>> result . selectExpr ( "explode(merged_chunk) as a" ) \
-... . selectExpr ( "a.begin" , "a.end" , "a.result as chunk" , "a.metadata.entity as entity" ) \
-... . show ( 5 , False )
-+-----+---+-----------+---------+
-|begin|end|chunk |entity |
-+-----+---+-----------+---------+
-|5 |15 |63-year-old|Age |
-|17 |19 |man |Gender |
-|64 |72 |recurrent |Modifier |
-|98 |107|cellulitis |Diagnosis|
-|110 |119|pneumonias |Diagnosis|
-+-----+---+-----------+---------+
-
-
-Methods
-
-
-
-
-
-
-__init__
()
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-fit
(dataset[, params])
-Fits a model to the input dataset with optional parameters.
-
-fitMultiple
(dataset, paramMaps)
-Fits a model to the input dataset for each param map in paramMaps .
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBlackList
(entities)
-If defined, list of entities to ignore.
-
-setChunkPrecedence
(b)
-Sets what is the precedence when two chunks have the same start and end indices.
-
-setFalsePositivesResource
(path[, read_as, ...])
-Sets file with false positive pairs
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMergeOverlapping
(b)
-Sets whether to merge overlapping matched chunks.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setReplaceDictResource
(path[, read_as, options])
-Sets replace dictionary pairs
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-blackList
-
-
-chunkPrecedence
-
-
-falsePositivesResource
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-mergeOverlapping
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-replaceDictResource
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-fit ( dataset , params = None )
-Fits a model to the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-params dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of
-param maps is given, this calls fit on each param map and returns a list of
-models.
-
-
-
-Returns
-
-Transformer
or a list of Transformer
fitted model(s)
-
-
-
-
-
-
-
-
-fitMultiple ( dataset , paramMaps )
-Fits a model to the input dataset for each param map in paramMaps .
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-paramMaps collections.abc.Sequence
A Sequence of param maps.
-
-
-
-Returns
-
-_FitMultipleIterator
A thread safe iterable which contains one model for each param map. Each
-call to next(modelIterator) will return (index, model) where model was fit
-using paramMaps[index] . index values may not be sequential.
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBlackList ( entities ) [source]
-If defined, list of entities to ignore. The rest will be processed.
-
-Parameters
-
-entities list If defined, list of entities to ignore. The rest will be processed.
-
-
-
-
-
-
-
-
-setChunkPrecedence ( b ) [source]
-Sets what is the precedence when two chunks have the same start and end indices. Possible values are [entity|identifier|field]
-
-Parameters
-
-b str Select what is the precedence when two chunks have the same start and end indices. Possible values are [entity|identifier|field]
-
-
-
-
-
-
-
-
-setFalsePositivesResource ( path , read_as = 'TEXT' , options = {'delimiter': ','} ) [source]
-Sets file with false positive pairs
-
-Parameters
-
-path str Path to the external resource
-
-read_as str, optional How to read the resource, by default ReadAs.TEXT
-
-options dict, optional Options for reading the resource, by default {“format”: “text”}
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMergeOverlapping ( b ) [source]
-Sets whether to merge overlapping matched chunks. Defaults false
-
-Parameters
-
-b bool whether to merge overlapping matched chunks. Defaults false
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setReplaceDictResource ( path , read_as = 'TEXT' , options = {'delimiter': ','} ) [source]
-Sets replace dictionary pairs
-
-Parameters
-
-path str Path to the external resource
-
-read_as str, optional How to read the resource, by default ReadAs.TEXT
-
-options dict, optional Options for reading the resource, by default {“format”: “text”}
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkMergeModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkMergeModel.html
deleted file mode 100644
index 0603baf34a..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkMergeModel.html
+++ /dev/null
@@ -1,1064 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkMergeModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkMergeModel
-
-
-class sparknlp_jsl.annotator. ChunkMergeModel ( classname = 'com.johnsnowlabs.nlp.annotators.merge.ChunkMergeModel' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-The model produced by ChunkMergerAproach.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-CHUNK,CHUNK
-CHUNK
-
-
-
-
-Parameters
-
-mergeOverlapping whether to merge overlapping matched chunks. Defaults false
-
-chunkPrecedence Select what is the precedence when two chunks have the same start and end indices. Possible values are [entity|identifier|field]
-
-blackList If defined, list of entities to ignore. The rest will be proccessed.
-
-
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-pretrained
(name[, lang, remote_loc])
-
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setChunkPrecedence
(b)
-Sets what is the precedence when two chunks have the same start and end indices.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMergeOverlapping
(b)
-Sets whether to merge overlapping matched chunks.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-blackList
-
-
-chunkPrecedence
-
-
-falsePositives
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-mergeOverlapping
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-replaceDict
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setChunkPrecedence ( b ) [source]
-Sets what is the precedence when two chunks have the same start and end indices. Possible values are [entity|identifier|field]
-
-Parameters
-
-b str Select what is the precedence when two chunks have the same start and end indices. Possible values are [entity|identifier|field]
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMergeOverlapping ( b ) [source]
-Sets whether to merge overlapping matched chunks. Defaults false
-
-Parameters
-
-v bool Whether to merge overlapping matched chunks. Defaults false
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkSentenceSplitter.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkSentenceSplitter.html
deleted file mode 100644
index cf84b70c37..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ChunkSentenceSplitter.html
+++ /dev/null
@@ -1,1086 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkSentenceSplitter — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ChunkSentenceSplitter
-
-
-class sparknlp_jsl.annotator. ChunkSentenceSplitter ( classname = 'com.johnsnowlabs.nlp.annotators.chunker.ChunkSentenceSplitter' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Split the document using the chunks that you provided,and put in the metadata the chunk entity.
-The first piece of documento to the first chunk will have the entity as header.
-Use the identifier or field as a entity.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK
-DOCUMENT
-
-
-
-
-Parameters
-
-inputType The type of the entity that you want to filter by default sentence_embeddings.Possible values
-document|token|wordpiece|word_embeddings|sentence_embeddings|category|date|sentiment|pos|chunk|named_entity|regex|dependency|labeled_dependency|language|keyword
-
-Examples
-——–
->>> document = DocumentAssembler().setInputCol(“text”).setOutputCol(“document”)
-** >>> regexMatcher = RegexMatcher().setExternalRules(“../src/test/resources/chunker/title_regex.txt”, “,”) **
-** … .setInputCols(“document”) **
-** … .setOutputCol(“chunks”) **
->>> chunkSentenceSplitter = ChunkSentenceSplitter().setInputCols(“chunks”,”document”).setOutputCol(“paragraphs”)
->>> pipeline = Pipeline().setStages([documentAssembler,regexMatcher,chunkSentenceSplitter])
->>> result = pipeline.fit(data).transform(data).select(“paragraphs”)
->>> result.show()
-
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setDefaultEntity
(value)
-Sets the key in the metadata dictionary that you want to filter (by default 'entity')
-
-setGroupBySentences
(value)
-Sets the groupBySentences that allow split the paragraphs grouping the chunks by sentences.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setInsertChunk
(value)
-Whether to insert the chunk in the paragraph or not.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-defaultEntity
-
-
-getter_attrs
-
-
-groupBySentences
-
-
-inputCols
-
-
-insertChunk
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setDefaultEntity ( value ) [source]
-Sets the key in the metadata dictionary that you want to filter (by default ‘entity’)
-
-Parameters
-
-value str The key in the metadata dictionary that you want to filter (by default ‘entity’)
-
-
-
-
-
-
-
-
-setGroupBySentences ( value ) [source]
-
-Sets the groupBySentences that allow split the paragraphs grouping the chunks by sentences. If is false we assume that we have 1 document annotations and all chunks are for this document.
-Use false if the input column of your chunk annotator was a sentenceDetector column.
-Use true when we have a sentence detector as input column or when the document have many sentences per row
-
-
-
-Parameters
-
-value Boolean Allow split the paragraphs grouping the chunks by sentences
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setInsertChunk ( value ) [source]
-Whether to insert the chunk in the paragraph or not.
-
-Parameters
-
-value Boolean Whether to insert the chunk in the paragraph or not.
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.CommonResolverParams.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.CommonResolverParams.html
deleted file mode 100644
index d69609af75..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.CommonResolverParams.html
+++ /dev/null
@@ -1,1016 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.CommonResolverParams — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.CommonResolverParams
-
-
-class sparknlp_jsl.annotator. CommonResolverParams [source]
-Bases: sparknlp.common.HasCaseSensitiveProperties
-Class used to have a common interface Entity Resolver family.
-
-Parameters
-
-distanceFunction What distance function to use for WMD: ‘EUCLIDEAN’ or ‘COSINE’.
-
-neighbours Number of neighbours to consider in the KNN query to calculate WMD
-
-alternatives Number of results to return in the metadata after sorting by last distance calculated
-
-extramassPenalty Penalty for extra words in the knowledge base match
-
-threshold Threshold value for the last distance calculated.
-
-enableWmd Whether or not to use WMD token distance.
-
-enableTfidf Whether or not to use TFIDF token distance.
-
-enableJaccard Whether or not to use Jaccard token distance.
-
-enableSorensenDice Whether or not to use Sorensen-Dice token distance.
-
-enableJaroWinkler = Whether or not to use Jaro-Winkler character distance.
-
-enableLevenshtein Whether or not to use Levenshtein character distance.
-
-distanceWeights Distance weights to apply before pooling: [WMD, TFIDF, Jaccard, SorensenDice, JaroWinkler, Levenshtein].
-
-poolingStrategy Pooling strategy to aggregate distances: AVERAGE, MIN or MAX.
-
-confidenceFunction What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-allDistancesMetadata Whether or not to return an all distance values in the metadata. Default: False.
-
-missAsEmpty Whether or not to return an empty annotation on unmatched chunks.
-
-
-
-
-Methods
-
-
-
-
-
-
-__init__
(*args, **kwargs)
-
-
-getCaseSensitive
()
-Gets whether to ignore case in tokens for embeddings matching.
-
-setAllDistancesMetadata
(s)
-Sets whether or not to return an all distance values in the metadata.
-
-setAlternatives
(a)
-Sets number of results to return in the metadata after sorting by last distance calculated.
-
-setCaseSensitive
(value)
-Sets whether to ignore case in tokens for embeddings matching.
-
-setConfidenceFunction
(s)
-What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-setDistanceFunction
(dist)
-Sets distance function to use for WMD: 'EUCLIDEAN' or 'COSINE'.
-
-setDistanceWeights
(l)
-Sets distance weights to apply before pooling: [WMD, TFIDF, Jaccard, SorensenDice, JaroWinkler, Levenshtein].
-
-setEnableJaccard
(e)
-Sets whether or not to use Jaccard token distance.
-
-setEnableJaroWinkler
(e)
-Whether or not to use Jaro-Winkler token distance.
-
-setEnableLevenshtein
(e)
-Sets whether or not to use Levenshtein token distance.
-
-setEnableSorensenDice
(e)
-Sets whether or not to use Sorensen-Dice token distance.
-
-setEnableTfidf
(e)
-Sets whether or not to use TFIDF token distance.
-
-setEnableWmd
(e)
-Sets whether or not to use WMD token distance.
-
-setExtramassPenalty
(emp)
-Sets penalty for extra words in the knowledge base match.
-
-setMissAsEmpty
(value)
-Sets whether or not to return an empty annotation on unmatched chunks
-
-setNeighbours
(k)
-Sets number of neighbours to consider in the KNN query to calculate WMD.
-
-setPoolingStrategy
(s)
-Sets pooling strategy to aggregate distances: AVERAGE, MIN or MAX.
-
-setThreshold
(thres)
-Sets Threshold value for the last distance calculated.
-
-
-
-Attributes
-
-
-
-
-
-
-allDistancesMetadata
-
-
-alternatives
-
-
-caseSensitive
-
-
-confidenceFunction
-
-
-distanceFunction
-
-
-distanceWeights
-
-
-enableJaccard
-
-
-enableJaroWinkler
-
-
-enableLevenshtein
-
-
-enableSorensenDice
-
-
-enableTfidf
-
-
-enableWmd
-
-
-extramassPenalty
-
-
-missAsEmpty
-
-
-neighbours
-
-
-poolingStrategy
-
-
-threshold
-
-
-
-
-
-
-getCaseSensitive ( )
-Gets whether to ignore case in tokens for embeddings matching.
-
-Returns
-
-bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-setAllDistancesMetadata ( s ) [source]
-Sets whether or not to return an all distance values in the metadata. Default: False.
-
-Parameters
-
-s bool whether or not to return an all distance values in the metadata. Default: False.
-
-
-
-
-
-
-
-
-setAlternatives ( a ) [source]
-Sets number of results to return in the metadata after sorting by last distance calculated.
-
-Parameters
-
-a int Number of results to return in the metadata after sorting by last distance calculated.
-
-
-
-
-
-
-
-
-setCaseSensitive ( value )
-Sets whether to ignore case in tokens for embeddings matching.
-
-Parameters
-
-value bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-setConfidenceFunction ( s ) [source]
-What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-Parameters
-
-s str What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-
-
-
-
-
-
-
-setDistanceFunction ( dist ) [source]
-Sets distance function to use for WMD: ‘EUCLIDEAN’ or ‘COSINE’.
-
-Parameters
-
-dist str Value that selects what distance function to use for WMD: ‘EUCLIDEAN’ or ‘COSINE’.
-
-
-
-
-
-
-
-
-setDistanceWeights ( l ) [source]
-Sets distance weights to apply before pooling: [WMD, TFIDF, Jaccard, SorensenDice, JaroWinkler, Levenshtein].
-
-Parameters
-
-l str Whether or not to use Jaro-Winkler token distance.
-
-
-
-
-
-
-
-
-setEnableJaccard ( e ) [source]
-Sets whether or not to use Jaccard token distance.
-
-Parameters
-
-e bool
-Whether or not to use Jaccard token distance.
-
-
-
-
-
-
-
-setEnableJaroWinkler ( e ) [source]
-Whether or not to use Jaro-Winkler token distance.
-
-Parameters
-
-e bool Whether or not to use Jaro-Winkler token distance.
-
-
-
-
-
-
-
-
-setEnableLevenshtein ( e ) [source]
-Sets whether or not to use Levenshtein token distance.
-
-Parameters
-
-e bool Whether or not to use Levenshtein token distance.
-
-
-
-
-
-
-
-
-setEnableSorensenDice ( e ) [source]
-Sets whether or not to use Sorensen-Dice token distance.
-
-Parameters
-
-e bool Whether or not to use Sorensen-Dice token distance.
-
-
-
-
-
-
-
-
-setEnableTfidf ( e ) [source]
-Sets whether or not to use TFIDF token distance.
-
-Parameters
-
-p bool Whether or not to use TFIDF token distance.
-
-
-
-
-
-
-
-
-setEnableWmd ( e ) [source]
-Sets whether or not to use WMD token distance.
-
-Parameters
-
-e bool Whether or not to use WMD token distance.
-
-
-
-
-
-
-
-
-Sets penalty for extra words in the knowledge base match.
-
-Parameters
-
-emp float Penalty for extra words in the knowledge base match.
-
-
-
-
-
-
-
-
-setMissAsEmpty ( value ) [source]
-Sets whether or not to return an empty annotation on unmatched chunks
-
-Parameters
-
-s bool whether or not to return an empty annotation on unmatched chunks
-
-
-
-
-
-
-
-
-setNeighbours ( k ) [source]
-Sets number of neighbours to consider in the KNN query to calculate WMD.
-
-Parameters
-
-k int Number of neighbours to consider in the KNN query to calculate WMD.
-
-
-
-
-
-
-
-
-setPoolingStrategy ( s ) [source]
-Sets pooling strategy to aggregate distances: AVERAGE, MIN or MAX.
-
-Parameters
-
-s str Pooling strategy to aggregate distances: AVERAGE, MIN or MAX.
-
-
-
-
-
-
-
-
-setThreshold ( thres ) [source]
-Sets Threshold value for the last distance calculated.
-
-Parameters
-
-thres float Threshold value for the last distance calculated.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ContextualParserApproach.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ContextualParserApproach.html
deleted file mode 100644
index c29840aa6f..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ContextualParserApproach.html
+++ /dev/null
@@ -1,1179 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ContextualParserApproach — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ContextualParserApproach
-
-
-class sparknlp_jsl.annotator. ContextualParserApproach [source]
-Bases: sparknlp.common.AnnotatorApproach
-Creates a model, that extracts entity from a document based on user defined rules.
-Rule matching is based on a RegexMatcher defined in a JSON file. It is set through the parameter setJsonPath()
-In this JSON file, regex is defined that you want to match along with the information that will output on metadata
-field. Additionally, a dictionary can be provided with setDictionary
to map extracted entities
-to a unified representation. The first column of the dictionary file should be the representation with following
-columns the possible matches.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, TOKEN
-CHUNK
-
-
-
-
-Parameters
-
-jsonPath Path to json file with rules
-
-caseSensitive Whether to use case sensitive when matching values
-
-prefixAndSuffixMatch Whether to match both prefix and suffix to annotate the hit
-
-dictionary Path to dictionary file in tsv or csv format
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
-
-
->>> documentAssembler = DocumentAssembler () ... . setInputCol ( "text" ) ... . setOutputCol ( "document" )
-...
->>> sentenceDetector = SentenceDetector () ... . setInputCols ([ "document" ]) ... . setOutputCol ( "sentence" )
-...
->>> tokenizer = Tokenizer () ... . setInputCols ([ "sentence" ]) ... . setOutputCol ( "token" )
-
-
-Define the parser (json file needs to be provided)
->>> data = spark . createDataFrame ([[ "A patient has liver metastases pT1bN0M0 and the T5 primary site may be colon or... " ]]) . toDF ( "text" )
->>> contextualParser = ContextualParserApproach () ... . setInputCols ([ "sentence" , "token" ]) ... . setOutputCol ( "entity" ) ... . setJsonPath ( "/path/to/regex_token.json" ) ... . setCaseSensitive ( True )
-...
->>> pipeline = Pipeline ( stages = [
-... documentAssembler ,
-... sentenceDetector ,
-... tokenizer ,
-... contextualParser
-... ])
-
-
->>> result = pipeline . fit ( data ) . transform ( data )
->>> result . selectExpr ( "explode(entity)" ) . show ( 5 , truncate = False )
-
-
-
-
-
-
-
-col
-
-{chunk, 32, 39, pT1bN0M0, {field -> Stage, normalized -> , confidenceValue -> 1.00, sentence -> 0}, []}
-{chunk, 49, 50, T5, {field -> Stage, normalized -> , confidenceValue -> 1.00, sentence -> 0}, []}
-{chunk, 148, 156, cT4bcN2M1, {field -> Stage, normalized -> , confidenceValue -> 1.00, sentence -> 1}, []}
-{chunk, 189, 194, T?N3M1, {field -> Stage, normalized -> , confidenceValue -> 1.00, sentence -> 2}, []}
-{chunk, 316, 323, pT1bN0M0, {field -> Stage, normalized -> , confidenceValue -> 1.00, sentence -> 3}, []}
-
-
-
-Methods
-
-
-
-
-
-
-__init__
()
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-fit
(dataset[, params])
-Fits a model to the input dataset with optional parameters.
-
-fitMultiple
(dataset, paramMaps)
-Fits a model to the input dataset for each param map in paramMaps .
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setCaseSensitive
(value)
-Sets whether to use case sensitive when matching values
-
-setDictionary
(path[, read_as, options])
-Sets if we want to use 'bow' for word embeddings or 'sentence' for sentences"
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setJsonPath
(value)
-Sets path to json file with rules
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setPrefixAndSuffixMatch
(value)
-Sets whether to match both prefix and suffix to annotate the hit
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-caseSensitive
-
-
-dictionary
-
-
-getter_attrs
-
-
-inputCols
-
-
-jsonPath
-
-
-lazyAnnotator
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-prefixAndSuffixMatch
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-fit ( dataset , params = None )
-Fits a model to the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-params dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of
-param maps is given, this calls fit on each param map and returns a list of
-models.
-
-
-
-Returns
-
-Transformer
or a list of Transformer
fitted model(s)
-
-
-
-
-
-
-
-
-fitMultiple ( dataset , paramMaps )
-Fits a model to the input dataset for each param map in paramMaps .
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-paramMaps collections.abc.Sequence
A Sequence of param maps.
-
-
-
-Returns
-
-_FitMultipleIterator
A thread safe iterable which contains one model for each param map. Each
-call to next(modelIterator) will return (index, model) where model was fit
-using paramMaps[index] . index values may not be sequential.
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setCaseSensitive ( value ) [source]
-Sets whether to use case sensitive when matching values
-
-Parameters
-
-value bool Whether to use case sensitive when matching values
-
-
-
-
-
-
-
-
-setDictionary ( path , read_as = 'TEXT' , options = {'delimiter': '\t'} ) [source]
-Sets if we want to use ‘bow’ for word embeddings or ‘sentence’ for sentences”
-
-Parameters
-
-path str Path wher is de dictionary
-
-read_as: ReadAs Format of the file
-
-options: dict Dictionary with the options to read the file.
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setJsonPath ( value ) [source]
-Sets path to json file with rules
-
-Parameters
-
-value str Path to json file with rules
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setPrefixAndSuffixMatch ( value ) [source]
-Sets whether to match both prefix and suffix to annotate the hit
-
-Parameters
-
-value bool Whether to match both prefix and suffix to annotate the hit
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ContextualParserModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ContextualParserModel.html
deleted file mode 100644
index ad40825adc..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ContextualParserModel.html
+++ /dev/null
@@ -1,1122 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ContextualParserModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ContextualParserModel
-
-
-class sparknlp_jsl.annotator. ContextualParserModel ( classname = 'com.johnsnowlabs.nlp.annotators.context.ContextualParserModel' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Extracts entity from a document based on user defined rules. Rule matching is based on a RegexMatcher defined in a
-JSON file. In this file, regex is defined that you want to match along with the information that will output on
-metadata field..
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, TOKEN
-CHUNK
-
-
-
-
-Parameters
-
-jsonPath Path to json file with rules
-
-caseSensitive Whether to use case sensitive when matching values
-
-prefixAndSuffixMatch Whether to match both prefix and suffix to annotate the hit
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
-
-
-Which means to extract the stage code on a sentence level.
-An example pipeline could then be defined like this
-Pipeline could then be defined like this
->>> documentAssembler = DocumentAssembler () ... . setInputCol ( "text" ) ... . setOutputCol ( "document" )
-...
->>> sentenceDetector = SentenceDetector () ... . setInputCols ([ "document" ]) ... . setOutputCol ( "sentence" )
-...
->>> tokenizer = Tokenizer () ... . setInputCols ([ "sentence" ]) ... . setOutputCol ( "token" )
-
-
->>> data = spark . createDataFrame ([[ "A patient has liver metastases pT1bN0M0 and the T5 primary site may be colon or... " ]]) . toDF ( "text" )
->>> contextualParser = ContextualParserModel . load ( "mycontextualParserModel" ) ... . setInputCols ([ "sentence" , "token" ]) ... . setOutputCol ( "entity" ) ...
->>> pipeline = Pipeline ( stages = [
-... documentAssembler ,
-... sentenceDetector ,
-... tokenizer ,
-... contextualParser
-... ])
-
-
->>> result = pipeline . fit ( data ) . transform ( data )
->>> result . selectExpr ( "explode(entity)" ) . show ( 5 , truncate = False )
-
-
-
-
-
-
-
-col
-
-{chunk, 32, 39, pT1bN0M0, {field -> Stage, normalized -> , confidenceValue -> 1.00, sentence -> 0}, []}
-{chunk, 49, 50, T5, {field -> Stage, normalized -> , confidenceValue -> 1.00, sentence -> 0}, []}
-{chunk, 148, 156, cT4bcN2M1, {field -> Stage, normalized -> , confidenceValue -> 1.00, sentence -> 1}, []}
-{chunk, 189, 194, T?N3M1, {field -> Stage, normalized -> , confidenceValue -> 1.00, sentence -> 2}, []}
-{chunk, 316, 323, pT1bN0M0, {field -> Stage, normalized -> , confidenceValue -> 1.00, sentence -> 3}, []}
-
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setCaseSensitive
(value)
-Sets whether to use case sensitive when matching values
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setJsonPath
(value)
-Sets path to json file with rules
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setPrefixAndSuffixMatch
(value)
-Sets whether to match both prefix and suffix to annotate the hit
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-caseSensitive
-
-
-getter_attrs
-
-
-inputCols
-
-
-jsonPath
-
-
-lazyAnnotator
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-prefixAndSuffixMatch
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setCaseSensitive ( value ) [source]
-Sets whether to use case sensitive when matching values
-
-Parameters
-
-value bool Whether to use case sensitive when matching values
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setJsonPath ( value ) [source]
-Sets path to json file with rules
-
-Parameters
-
-value str Path to json file with rules
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setPrefixAndSuffixMatch ( value ) [source]
-Sets whether to match both prefix and suffix to annotate the hit
-
-Parameters
-
-value bool Whether to match both prefix and suffix to annotate the hit
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DateNormalizer.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DateNormalizer.html
deleted file mode 100644
index 28a0cbbd1c..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DateNormalizer.html
+++ /dev/null
@@ -1,1115 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DateNormalizer — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DateNormalizer
-
-
-class sparknlp_jsl.annotator. DateNormalizer ( classname = 'com.johnsnowlabs.nlp.annotators.normalizer.DateNormalizer' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Try to normalize dates in chunks annotations.
-The expected format for the date will be YYYY/MM/DD.
-If the date is normalized then field normalized in metadata will be true else will be false.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-CHUNK
-CHUNK
-
-
-
-
-Parameters
-
-anchorDateYear Add an anchor year for the relative dates such as a day after tomorrow.
-If not set it will use the current year. Example: 2021
-
-anchorDateMonth Add an anchor month for the relative dates such as a day after tomorrow.
-If not set it will use the current month. Example: 1 which means January
-
-anchorDateDay Add an anchor day of the day for the relative dates such as a day after
-tomorrow. If not set it will use the current day. Example: 11
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>>document_assembler = DocumentAssembler().setInputCol('ner_chunk').setOutputCol('document')
->>>chunksDF = document_assembler.transform(df)
->>>aa = map_annotations_col(chunksDF.select("document"),
-... lambda x : [ Annotation ( 'chunk' , a . begin , a . end , a . result , a . metadata , a . embeddings ) for a in x ], "document" ,
-... "chunk_date" , "chunk" )
->>>dateNormalizer = DateNormalizer().setInputCols('chunk_date').setOutputCol('date').setAnchorDateYear(2000).setAnchorDateMonth(3).setAnchorDateDay(15)
->>> result = dateNormalizer . transform ( aa )
->>> data = spark . createDataFrame ([[ "Fri, 21 Nov 1997" ], [ "next week at 7.30" ], [ "see you a day after" ]]) . toDF ( "text" )
->>> result = pipeline . fit ( data ) . transform ( data )
->>> result . selectExpr ( "date.result" , "text" )
-+-------------+-----------+
-| result| text|
-+-------------+-----------+
-| [08/02/2018]| 08/02/2018|
-| [11/2018]| 11/2018|
-| [11/01/2018]| 11/01/2018|
-|[next monday]|next monday|
-| [today]| today|
-| [next week]| next week|
-+-------------+-----------+
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setAnchorDateDay
(value)
-Sets an anchor day of the day for the relative dates such as a day after tomorrow.
-
-setAnchorDateMonth
(value)
-Sets an anchor month for the relative dates such as a day after tomorrow.
-
-setAnchorDateYear
(value)
-Sets an anchor year for the relative dates such as a day after tomorrow.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-anchorDateDay
-
-
-anchorDateMonth
-
-
-anchorDateYear
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setAnchorDateDay ( value ) [source]
-Sets an anchor day of the day for the relative dates such as a day
-after tomorrow. If not set it will use the current day.
-Example: 11
-
-Parameters
-
-value int The anchor day for relative dates
-
-
-
-
-
-
-
-
-setAnchorDateMonth ( value ) [source]
-Sets an anchor month for the relative dates such as a day after
-tomorrow. If not set it will use the current month.
-Example: 1 which means January
-
-Parameters
-
-value int The anchor month for relative dates
-
-
-
-
-
-
-
-
-setAnchorDateYear ( value ) [source]
-Sets an anchor year for the relative dates such as a day after
-tomorrow. If not set it will use the current year.
-Example: 2021
-
-Parameters
-
-value int The anchor year for relative dates
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DeIdentification.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DeIdentification.html
deleted file mode 100644
index adcfe9f79a..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DeIdentification.html
+++ /dev/null
@@ -1,1653 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DeIdentification — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DeIdentification
-
-
-class sparknlp_jsl.annotator. DeIdentification [source]
-Bases: sparknlp.common.AnnotatorApproach
-Contains all the methods for training a DeIdentificationModel model.
-This module can obfuscate or mask the entities that contains personal information. These can be set with a file of
-regex patterns with setRegexPatternsDictionary, where each line is a mapping of entity to regex
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK, TOKEN
-DOCUMENT
-
-
-
-
-Parameters
-
-regexPatternsDictionary ictionary with regular expression patterns that match some protected entity
-
-mode Mode for Anonimizer [‘mask’|’obfuscate’]
-
-obfuscateDate When mode==’obfuscate’ whether to obfuscate dates or not. This param helps in consistency to make dateFormats more visible. When setting to true
, make sure dateFormats param fits the needs (default: false)
-
-obfuscateRefFile File with the terms to be used for Obfuscation
-
-refFileFormat Format of the reference file
-
-refSep Sep character in refFile
-
-dateTag Tag representing dates in the obfuscate reference file (default: DATE)
-
-days Number of days to obfuscate the dates by displacement. If not provided a random integer between 1 and 60 will be used
-
-dateToYear True if we want the model to transform dates into years, False otherwise.
-
-minYear Minimum year to be used when transforming dates into years.
-
-dateFormats List of date formats to automatically displace if parsed
-
-consistentObfuscation Whether to replace very similar entities in a document with the same randomized term (default: true)
-The similarity is based on the Levenshtein Distance between the words.
-
-sameEntityThreshold Similarity threshold [0.0-1.0] to consider two appearances of an entity as the same
(default: 0.9).
-
-obfuscateRefSource The source of obfuscation of to obfuscate the entities.For dates entities doesnt apply tha method.
-The values ar the following:
-file: Takes the entities from the obfuscatorRefFile
-faker: Takes the entities from the Faker module
-both : Takes the entities from the obfuscatorRefFile and the faker module randomly.
-
-regexOverride If is true prioritize the regex entities, if is false prioritize the ner.
-
-seed It is the seed to select the entities on obfuscate mode.With the seed you can reply a execution several times with the same ouptut.
-
-ignoreRegex Select if you want to use regex file loaded in the model.If true the default regex file will be not used.The default value is false.
-
-isRandomDateDisplacement Use a random displacement days in dates entities,that random number is based on the [[DeIdentificationParams.seed]]
-If true use random displacement days in dates entities,if false use the [[DeIdentificationParams.days]]
-The default value is false.
-
-mappingsColumn This is the mapping column that will return the Annotations chunks with the fake entities.
-
-returnEntityMappings With this property you select if you want to return mapping column
-
-blackList List of entities ignored for masking or obfuscation.The default values are: “SSN”,”PASSPORT”,”DLN”,”NPI”,”C_CARD”,”IBAN”,”DEA”
-
-maskingPolicy
-Select the masking policy: same_length_chars: Replace the obfuscated entity with a masking sequence composed of asterisks and surrounding squared brackets, being the total length of the masking sequence of the same length as the original sequence.
-Example, Smith -> [** * ].
-If the entity is less than 3 chars (like Jo, or 5), asterisks without brackets will be returned.
-entity_labels: Replace the values with the corresponding entity labels.
-fixed_length_chars: Replace the obfuscated entity with a masking sequence composed of a fixed number of asterisks.
-
-
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> documentAssembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
-...
->>> sentenceDetector = SentenceDetector () \
-... . setInputCols ([ "document" ]) \
-... . setOutputCol ( "sentence" ) \
-... . setUseAbbreviations ( True )
-...
->>> tokenizer = Tokenizer () ... . setInputCols ([ "sentence" ]) ... . setOutputCol ( "token" )
-...
->> embeddings = WordEmbeddingsModel ... .pretrained("embeddings_clinical", "en", "clinical/models") ... .setInputCols(["sentence", "token"]) ... .setOutputCol("embeddings")
-...
- Ner entities
->>> clinical_sensitive_entities = MedicalNerModel \
-... . pretrained ( "ner_deid_enriched" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "token" , "embeddings" ]) . setOutputCol ( "ner" )
-...
->>> nerConverter = NerConverter () \
-... . setInputCols ([ "sentence" , "token" , "ner" ]) \
-... . setOutputCol ( "ner_con" )
- Deidentification
->>> deIdentification = DeIdentification () \
-... . setInputCols ([ "ner_chunk" , "token" , "sentence" ]) \
-... . setOutputCol ( "dei" ) \
-... # file with custom regex pattern for custom entities ... .setRegexPatternsDictionary("path/to/dic_regex_patterns_main_categories.txt") \
-... # file with custom obfuscator names for the entities ... .setObfuscateRefFile("path/to/obfuscate_fixed_entities.txt") \
-... . setRefFileFormat ( "csv" ) \
-... . setRefSep ( "#" ) \
-... . setMode ( "obfuscate" ) \
-... . setDateFormats ( Array ( "MM/dd/yy" , "yyyy-MM-dd" )) \
-... . setObfuscateDate ( True ) \
-... . setDateTag ( "DATE" ) \
-... . setDays ( 5 ) \
-... . setObfuscateRefSource ( "file" )
-Pipeline
->>> data = spark . createDataFrame ([
-... [ "# 7194334 Date : 01/13/93 PCP : Oliveira , 25 years-old , Record date : 2079-11-09." ]
-... ]) . toDF ( "text" )
->>> pipeline = Pipeline ( stages = [
-... documentAssembler ,
-... sentenceDetector ,
-... tokenizer ,
-... embeddings ,
-... clinical_sensitive_entities ,
-... nerConverter ,
-... deIdentification
-... ])
->>> result = pipeline . fit ( data ) . transform ( data )
->>> result . select ( "dei.result" ) . show ( truncate = False )
- +--------------------------------------------------------------------------------------------------+
- |result |
- +--------------------------------------------------------------------------------------------------+
- |[# 01010101 Date : 01/18/93 PCP : Dr. Gregory House , <AGE> years-old , Record date : 2079-11-14.]|
- +--------------------------------------------------------------------------------------------------+
-
-
-Methods
-
-
-
-
-
-
-__init__
()
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-fit
(dataset[, params])
-Fits a model to the input dataset with optional parameters.
-
-fitMultiple
(dataset, paramMaps)
-Fits a model to the input dataset for each param map in paramMaps .
-
-getBlackList
()
-
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBlackList
(s)
-List of entities ignored for masking or obfuscation. Parameters ---------- s : list List of entities ignored for masking or obfuscation.The default values are: values are "SSN","PASSPORT","DLN","NPI","C_CARD","IBAN","DEA".
-
-setConsistentObfuscation
(s)
-Sets whether to replace very similar entities in a document with the same randomized term (default: true).The similarity is based on the Levenshtein Distance between the words.
-
-setDateFormats
(s)
-Sets list of date formats to automatically displace if parsed
-
-setDateTag
(t)
-Sets tag representing dates in the obfuscate reference file (default: DATE)
-
-setDateToYear
(s)
-Sets transform dates into years.
-
-setDays
(d)
-Sets number of days to obfuscate by displacement the dates.
-
-setFixedMaskLength
(length)
-Fixed mask length: this is the length of the masking sequence that will be used when the 'fixed_length_chars' masking
-
-setIgnoreRegex
(s)
-Sets if you want to use regex.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setIsRandomDateDisplacement
(s)
-Sets if you want to use random displacement in dates
-
-setLanguage
(l)
-The language used to select the regex file and some faker entities.'en'(english),'de'() or 'es'(Spanish)
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMappingsColumn
(s)
-Sets the name of mapping column that will return the Annotations chunks with the fake entities
-
-setMaskingPolicy
(m)
-Sets the masking policy:
-
-setMinYear
(s)
-Sets minimum year to be used when transforming dates into years.
-
-setMode
(m)
-Sets mode for Anonimizer ['mask'|'obfuscate']
-
-setObfuscateDate
(value)
-Sets auxiliary label which maps resolved entities to additional labels
-
-setObfuscateRefFile
(f)
-Set file with the terms to be used for Obfuscation Parameters ---------- f : str File with the terms to be used for Obfuscation
-
-setObfuscateRefSource
(s)
-Sets mode for select obfuscate source ['both'|'faker'| 'file]
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setRefFileFormat
(f)
-Sets format of the reference file
-
-setRefSep
(c)
-Sets separator character in refFile
-
-setRegexOverride
(s)
-Sets whether to prioritize regex over ner entities
-
-setRegexPatternsDictionary
(path[, read_as, ...])
-Setst dictionary with regular expression patterns that match some protected entity
-
-setReturnEntityMappings
(s)
-Sets if you want to return mapping column
-
-setSameEntityThreshold
(s)
-Sets similarity threshold [0.0-1.0] to consider two appearances of an entity as the same (default: 0.9).
-
-setSeed
(s)
-Sets the seed to select the entities on obfuscate mode
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-blackList
-
-
-consistentObfuscation
-
-
-dateFormats
-
-
-dateTag
-
-
-dateToYear
-
-
-days
-
-
-fixedMaskLength
-
-
-getter_attrs
-
-
-ignoreRegex
-
-
-inputCols
-
-
-isRandomDateDisplacement
-
-
-language
-
-
-lazyAnnotator
-
-
-mappingsColumn
-
-
-maskingPolicy
-
-
-minYear
-
-
-mode
-
-
-name
-
-
-obfuscateDate
-
-
-obfuscateRefFile
-
-
-obfuscateRefSource
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-refFileFormat
-
-
-refSep
-
-
-regexOverride
-
-
-regexPatternsDictionary
-
-
-returnEntityMappings
-
-
-sameEntityThreshold
-
-
-seed
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-fit ( dataset , params = None )
-Fits a model to the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-params dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of
-param maps is given, this calls fit on each param map and returns a list of
-models.
-
-
-
-Returns
-
-Transformer
or a list of Transformer
fitted model(s)
-
-
-
-
-
-
-
-
-fitMultiple ( dataset , paramMaps )
-Fits a model to the input dataset for each param map in paramMaps .
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-paramMaps collections.abc.Sequence
A Sequence of param maps.
-
-
-
-Returns
-
-_FitMultipleIterator
A thread safe iterable which contains one model for each param map. Each
-call to next(modelIterator) will return (index, model) where model was fit
-using paramMaps[index] . index values may not be sequential.
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBlackList ( s ) [source]
-List of entities ignored for masking or obfuscation.
-Parameters
-———-
-s : list
-
-List of entities ignored for masking or obfuscation.The default values are: values are “SSN”,”PASSPORT”,”DLN”,”NPI”,”C_CARD”,”IBAN”,”DEA”
-
-
-
-
-
-setConsistentObfuscation ( s ) [source]
-Sets whether to replace very similar entities in a document with the same randomized term (default: true).The similarity is based on the Levenshtein Distance between the words.
-
-Parameters
-
-s str Whether to replace very similar entities in a document with the same randomized term .The similarity is based on the Levenshtein Distance between the words.
-
-
-
-
-
-
-
-
-setDateFormats ( s ) [source]
-Sets list of date formats to automatically displace if parsed
-
-Parameters
-
-name str List of date formats to automatically displace if parsed
-
-
-
-
-
-
-
-
-setDateTag ( t ) [source]
-Sets tag representing dates in the obfuscate reference file (default: DATE)
-
-Parameters
-
-f str Tag representing dates in the obfuscate reference file (default: DATE)
-
-
-
-
-
-
-
-
-setDateToYear ( s ) [source]
-Sets transform dates into years.
-
-Parameters
-
-s bool True if we want the model to transform dates into years, False otherwise.
-
-
-
-
-
-
-
-
-setDays ( d ) [source]
-Sets number of days to obfuscate by displacement the dates.
-
-Parameters
-
-d int Number of days to obfuscate by displacement the dates.
-
-
-
-
-
-
-
-
-setFixedMaskLength ( length ) [source]
-
-Fixed mask length: this is the length of the masking sequence that will be used when the ‘fixed_length_chars’ masking policy is selected.
-
-
-
-lengthint The mask length
-
-
-
-
-
-
-setIgnoreRegex ( s ) [source]
-Sets if you want to use regex.
-
-Parameters
-
-s bool Whether to use regex.
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setIsRandomDateDisplacement ( s ) [source]
-Sets if you want to use random displacement in dates
-
-Parameters
-
-s bool Boolean value to select if you want to use random displacement in dates
-
-
-
-
-
-
-
-
-setLanguage ( l ) [source]
-The language used to select the regex file and some faker entities.’en’(english),’de’() or ‘es’(Spanish)
-
-Parameters
-
-l str The language used to select the regex file and some faker entities.’en’(english),’de’() or ‘es’(Spanish)
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMappingsColumn ( s ) [source]
-Sets the name of mapping column that will return the Annotations chunks with the fake entities
-
-Parameters
-
-name str Mapping column that will return the Annotations chunks with the fake entities
-
-
-
-
-
-
-
-
-setMaskingPolicy ( m ) [source]
-
-Sets the masking policy: same_length_chars: Replace the obfuscated entity with a masking sequence composed of asterisks and surrounding squared brackets, being the total length of the masking sequence of the same length as the original sequence.
-Example, Smith -> [** * ].
-If the entity is less than 3 chars (like Jo, or 5), asterisks without brackets will be returned.
-entity_labels: Replace the values with the corresponding entity labels.
-fixed_length_chars: Replace the obfuscated entity with a masking sequence composed of a fixed number of asterisks.
-
-
-
-Parameters
-
-m str The masking policy
-
-
-
-
-
-
-
-
-setMinYear ( s ) [source]
-Sets minimum year to be used when transforming dates into years.
-
-Parameters
-
-s int Minimum year to be used when transforming dates into years.
-
-
-
-
-
-
-
-
-setMode ( m ) [source]
-Sets mode for Anonimizer [‘mask’|’obfuscate’]
-
-Parameters
-
-m str Mode for Anonimizer [‘mask’|’obfuscate’]
-
-
-
-
-
-
-
-
-setObfuscateDate ( value ) [source]
-Sets auxiliary label which maps resolved entities to additional labels
-
-Parameters
-
-value str When mode==”obfuscate” whether to obfuscate dates or not. This param helps in consistency to make dateFormats more visible.
-When setting to true , make sure dateFormats param fits the needs (default: false)
-WHen setting to ‘false’ then the date will be mask to <DATE>
-
-
-
-
-
-
-
-
-setObfuscateRefFile ( f ) [source]
-Set file with the terms to be used for Obfuscation
-Parameters
-———-
-f : str
-
-File with the terms to be used for Obfuscation
-
-
-
-
-
-setObfuscateRefSource ( s ) [source]
-Sets mode for select obfuscate source [‘both’|’faker’| ‘file]
-
-Parameters
-
-s str Mode for select obfuscate source [‘both’|’faker’| ‘file]
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setRefFileFormat ( f ) [source]
-Sets format of the reference file
-
-Parameters
-
-f str Format of the reference file
-
-
-
-
-
-
-
-
-setRefSep ( c ) [source]
-Sets separator character in refFile
-
-Parameters
-
-f str Separator character in refFile
-
-
-
-
-
-
-
-
-setRegexOverride ( s ) [source]
-Sets whether to prioritize regex over ner entities
-
-Parameters
-
-s bool Whether to prioritize regex over ner entities
-
-
-
-
-
-
-
-
-setRegexPatternsDictionary ( path , read_as = 'TEXT' , options = {'delimiter': ' '} ) [source]
-Setst dictionary with regular expression patterns that match some protected entity
-
-Parameters
-
-path str Path wher is de dictionary
-
-read_as: ReadAs Format of the file
-
-options: dict Dictionary with the options to read the file.
-
-
-
-
-
-
-
-
-setReturnEntityMappings ( s ) [source]
-Sets if you want to return mapping column
-
-Parameters
-
-s bool Whether to return the mappings column.
-
-
-
-
-
-
-
-
-setSameEntityThreshold ( s ) [source]
-Sets similarity threshold [0.0-1.0] to consider two appearances of an entity as the same (default: 0.9).
-
-Parameters
-
-s float Similarity threshold [0.0-1.0] to consider two appearances of an entity as the same (default: 0.9).
-
-
-
-
-
-
-
-
-setSeed ( s ) [source]
-Sets the seed to select the entities on obfuscate mode
-
-Parameters
-
-s int The seed to select the entities on obfuscate mode
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DeIdentificationModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DeIdentificationModel.html
deleted file mode 100644
index 785b63e915..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DeIdentificationModel.html
+++ /dev/null
@@ -1,1543 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DeIdentificationModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DeIdentificationModel
-
-
-class sparknlp_jsl.annotator. DeIdentificationModel ( classname = 'com.johnsnowlabs.nlp.annotators.deid.DeIdentificationModel' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-The DeIdentificationModel model can obfuscate or mask the entities that contains personal information. These can be set with a file of
-regex patterns with setRegexPatternsDictionary, where each line is a mapping of entity to regex
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK, TOKEN
-DOCUMENT
-
-
-
-
-Parameters
-
-regexPatternsDictionary ictionary with regular expression patterns that match some protected entity
-
-mode Mode for Anonimizer [‘mask’|’obfuscate’]
-
-obfuscateDate When mode==’obfuscate’ whether to obfuscate dates or not. This param helps in consistency to make dateFormats more visible. When setting to true , make sure dateFormats param fits the needs (default: false)
-
-dateTag Tag representing dates in the obfuscate reference file (default: DATE)
-
-days Number of days to obfuscate the dates by displacement. If not provided a random integer between 1 and 60 will be used
-
-dateToYear True if we want the model to transform dates into years, False otherwise.
-
-minYear Minimum year to be used when transforming dates into years.
-
-dateFormats List of date formats to automatically displace if parsed
-
-consistentObfuscation Whether to replace very similar entities in a document with the same randomized term (default: true)
-The similarity is based on the Levenshtein Distance between the words.
-
-sameEntityThreshold Similarity threshold [0.0-1.0] to consider two appearances of an entity as the same (default: 0.9).
-
-obfuscateRefSource The source of obfuscation of to obfuscate the entities.For dates entities doesnt apply tha method.
-The values ar the following:
-file: Takes the entities from the obfuscatorRefFile
-faker: Takes the entities from the Faker module
-both: Takes the entities from the obfuscatorRefFile and the faker module randomly.
-
-regexOverride If is true prioritize the regex entities, if is false prioritize the ner.
-
-seed It is the seed to select the entities on obfuscate mode.With the seed you can reply a execution several times with the same ouptut.
-
-ignoreRegex Select if you want to use regex file loaded in the model.If true the default regex file will be not used.The default value is false.
-
-isRandomDateDisplacement Use a random displacement days in dates entities,that random number is based on the [[DeIdentificationParams.seed]]
-If true use random displacement days in dates entities,if false use the [[DeIdentificationParams.days]]
-The default value is false.
-
-mappingsColumn This is the mapping column that will return the Annotations chunks with the fake entities.
-
-returnEntityMappings With this property you select if you want to return mapping column
-
-blackList List of entities ignored for masking or obfuscation.The default values are: “SSN”,”PASSPORT”,”DLN”,”NPI”,”C_CARD”,”IBAN”,”DEA”
-
-regexEntities Keep the regex entities used in the regexPatternDictionary
-
-maskingPolicy
-Select the masking policy: same_length_chars: Replace the obfuscated entity with a masking sequence composed of asterisks and surrounding squared brackets, being the total length of the masking sequence of the same length as the original sequence.
-Example, Smith -> [** * ].
-If the entity is less than 3 chars (like Jo, or 5), asterisks without brackets will be returned.
-entity_labels: Replace the values with the corresponding entity labels.
-fixed_length_chars: Replace the obfuscated entity with a masking sequence composed of a fixed number of asterisks.
-
-
-
-fixedMaskLength: this is the length of the masking sequence that will be used when the ‘fixed_length_chars’ masking policy is selected.
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> documentAssembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
-...
->>> sentenceDetector = SentenceDetector () \
-... . setInputCols ([ "document" ]) \
-... . setOutputCol ( "sentence" ) \
-... . setUseAbbreviations ( True )
-...
->>> tokenizer = Tokenizer () ... . setInputCols ([ "sentence" ]) ... . setOutputCol ( "token" )
-...
->> embeddings = WordEmbeddingsModel ... .pretrained("embeddings_clinical", "en", "clinical/models") ... .setInputCols(["sentence", "token"]) ... .setOutputCol("embeddings")
-...
- Ner entities
->>> clinical_sensitive_entities = MedicalNerModel \
-... . pretrained ( "ner_deid_enriched" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "token" , "embeddings" ]) . setOutputCol ( "ner" )
-...
->>> nerConverter = NerConverter () \
-... . setInputCols ([ "sentence" , "token" , "ner" ]) \
-... . setOutputCol ( "ner_con" )
-...
- Deidentification
->>> deIdentification = DeIdentificationModel . pretrained ( "deidentify_large" , "en" , "clinical/models" ) \
-... . setInputCols ([ "ner_chunk" , "token" , "sentence" ]) \
-... . setOutputCol ( "dei" ) \
-... . setMode ( "obfuscate" ) \
-... . setDateFormats ( Array ( "MM/dd/yy" , "yyyy-MM-dd" )) \
-... . setObfuscateDate ( True ) \
-... . setDateTag ( "DATE" ) \
-... . setDays ( 5 ) \
-... . setObfuscateRefSource ( "both" )
->>> data = spark . createDataFrame ([
-... [ "# 7194334 Date : 01/13/93 PCP : Oliveira , 25 years-old , Record date : 2079-11-09." ]
-... ]) . toDF ( "text" )
->>> pipeline = Pipeline ( stages = [
-... documentAssembler ,
-... sentenceDetector ,
-... tokenizer ,
-... embeddings ,
-... clinical_sensitive_entities ,
-... nerConverter ,
-... deIdentification
-... ])
->>> result = pipeline . fit ( data ) . transform ( data )
->>> result . select ( "dei.result" ) . show ( truncate = False )
- +--------------------------------------------------------------------------------------------------+
- |result |
- +--------------------------------------------------------------------------------------------------+
- |[# 01010101 Date : 01/18/93 PCP : Dr. Gregory House , <AGE> years-old , Record date : 2079-11-14.]|
- +--------------------------------------------------------------------------------------------------+
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getBlackList
()
-
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-getRegexEntities
()
-
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-pretrained
(name[, lang, remote_loc])
-
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBlackList
(s)
-List of entities ignored for masking or obfuscation. Parameters ---------- s : list List of entities ignored for masking or obfuscation.The default values are: "SSN","PASSPORT","DLN","NPI","C_CARD","IBAN","DEA".
-
-setConsistentObfuscation
(s)
-Sets whether to replace very similar entities in a document with the same randomized term (default: true).The similarity is based on the Levenshtein Distance between the words.
-
-setDateFormats
(s)
-Sets list of date formats to automatically displace if parsed
-
-setDateTag
(t)
-Set file with the terms to be used for Obfuscation
-
-setDateToYear
(s)
-Sets transform dates into years.
-
-setDays
(d)
-Sets number of days to obfuscate by displacement the dates.
-
-setFixedMaskLength
(length)
-Fixed mask length: this is the length of the masking sequence that will be used when the 'fixed_length_chars' masking
-
-setIgnoreRegex
(s)
-Sets if you want to use regex.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setIsRandomDateDisplacement
(s)
-Sets if you want to use random displacement in dates
-
-setLanguage
(l)
-The language used to select the regex file and some faker entities.'en'(english),'de'() or 'es'(Spanish)
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMappingsColumn
(s)
-Sets the name of mapping column that will return the Annotations chunks with the fake entities
-
-setMaskingPolicy
(m)
-Sets the masking policy:
-
-setMinYear
(s)
-Sets minimum year to be used when transforming dates into years.
-
-setMode
(m)
-Sets mode for Anonimizer ['mask'|'obfuscate']
-
-setObfuscateDate
(value)
-Sets auxiliary label which maps resolved entities to additional labels
-
-setObfuscateRefSource
(s)
-Sets mode for select obfuscate source ['both'|'faker'| 'file]
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setRegexOverride
(s)
-Sets whether to prioritize regex over ner entities
-
-setReturnEntityMappings
(s)
-Sets if you want to return mapping column
-
-setSameEntityThreshold
(s)
-Sets similarity threshold [0.0-1.0] to consider two appearances of an entity as the same (default: 0.9).
-
-setSeed
(s)
-Sets the seed to select the entities on obfuscate mode
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-blackList
-
-
-consistentObfuscation
-
-
-dateFormats
-
-
-dateTag
-
-
-dateToYear
-
-
-days
-
-
-fixedMaskLength
-
-
-getter_attrs
-
-
-ignoreRegex
-
-
-inputCols
-
-
-isRandomDateDisplacement
-
-
-language
-
-
-lazyAnnotator
-
-
-mappingsColumn
-
-
-maskingPolicy
-
-
-minYear
-
-
-mode
-
-
-name
-
-
-obfuscateDate
-
-
-obfuscateRefSource
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-regexEntities
-
-
-regexOverride
-
-
-returnEntityMappings
-
-
-sameEntityThreshold
-
-
-seed
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBlackList ( s ) [source]
-List of entities ignored for masking or obfuscation.
-Parameters
-———-
-s : list
-
-List of entities ignored for masking or obfuscation.The default values are: “SSN”,”PASSPORT”,”DLN”,”NPI”,”C_CARD”,”IBAN”,”DEA”
-
-
-
-
-
-setConsistentObfuscation ( s ) [source]
-Sets whether to replace very similar entities in a document with the same randomized term (default: true).The similarity is based on the Levenshtein Distance between the words.
-
-Parameters
-
-s str Whether to replace very similar entities in a document with the same randomized term (default: true).The similarity is based on the Levenshtein Distance between the words.
-
-
-
-
-
-
-
-
-setDateFormats ( s ) [source]
-Sets list of date formats to automatically displace if parsed
-
-Parameters
-
-s str List of date formats to automatically displace if parsed
-
-
-
-
-
-
-
-
-setDateTag ( t ) [source]
-Set file with the terms to be used for Obfuscation
-
-Parameters
-
-f str File with the terms to be used for Obfuscation
-
-
-
-
-
-
-
-
-setDateToYear ( s ) [source]
-Sets transform dates into years.
-
-Parameters
-
-s bool True if we want the model to transform dates into years, False otherwise.
-
-
-
-
-
-
-
-
-setDays ( d ) [source]
-Sets number of days to obfuscate by displacement the dates.
-
-Parameters
-
-d int Number of days to obfuscate by displacement the dates.
-
-
-
-
-
-
-
-
-setFixedMaskLength ( length ) [source]
-
-Fixed mask length: this is the length of the masking sequence that will be used when the ‘fixed_length_chars’ masking policy is selected.
-
-
-
-lengthint The mask length
-
-
-
-
-
-
-setIgnoreRegex ( s ) [source]
-Sets if you want to use regex.
-
-Parameters
-
-s bool Whether to use regex.
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setIsRandomDateDisplacement ( s ) [source]
-Sets if you want to use random displacement in dates
-
-Parameters
-
-s bool Boolean value to select if you want to use random displacement in dates
-
-
-
-
-
-
-
-
-setLanguage ( l ) [source]
-The language used to select the regex file and some faker entities.’en’(english),’de’() or ‘es’(Spanish)
-
-Parameters
-
-l str The language used to select the regex file and some faker entities.’en’(english),’de’() or ‘es’(Spanish)
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMappingsColumn ( s ) [source]
-Sets the name of mapping column that will return the Annotations chunks with the fake entities
-
-Parameters
-
-name str Mapping column that will return the Annotations chunks with the fake entities
-
-
-
-
-
-
-
-
-setMaskingPolicy ( m ) [source]
-
-Sets the masking policy: same_length_chars: Replace the obfuscated entity with a masking sequence composed of asterisks and surrounding squared brackets, being the total length of the masking sequence of the same length as the original sequence.
-Example, Smith -> [** * ].
-If the entity is less than 3 chars (like Jo, or 5), asterisks without brackets will be returned.
-entity_labels: Replace the values with the corresponding entity labels.
-fixed_length_chars: Replace the obfuscated entity with a masking sequence composed of a fixed number of asterisks.
-
-
-
-Parameters
-
-m str The masking policy
-
-
-
-
-
-
-
-
-setMinYear ( s ) [source]
-Sets minimum year to be used when transforming dates into years.
-
-Parameters
-
-s int Minimum year to be used when transforming dates into years.
-
-
-
-
-
-
-
-
-setMode ( m ) [source]
-Sets mode for Anonimizer [‘mask’|’obfuscate’]
-
-Parameters
-
-m str Mode for Anonimizer [‘mask’|’obfuscate’]
-
-
-
-
-
-
-
-
-setObfuscateDate ( value ) [source]
-Sets auxiliary label which maps resolved entities to additional labels
-
-Parameters
-
-value str When mode==”obfuscate” whether to obfuscate dates or not. This param helps in consistency to make dateFormats more visible.
-When setting to true , make sure dateFormats param fits the needs (default: false)
-WHen setting to ‘false’ then the date will be mask to <DATE>
-
-
-
-
-
-
-
-
-setObfuscateRefSource ( s ) [source]
-Sets mode for select obfuscate source [‘both’|’faker’| ‘file]
-
-Parameters
-
-s str Mode for select obfuscate source [‘both’|’faker’| ‘file]
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setRegexOverride ( s ) [source]
-Sets whether to prioritize regex over ner entities
-
-Parameters
-
-s bool Whether to prioritize regex over ner entities
-
-
-
-
-
-
-
-
-setReturnEntityMappings ( s ) [source]
-Sets if you want to return mapping column
-
-Parameters
-
-s bool Whether to save the mappings column.
-
-
-
-
-
-
-
-
-setSameEntityThreshold ( s ) [source]
-Sets similarity threshold [0.0-1.0] to consider two appearances of an entity as the same (default: 0.9).
-
-Parameters
-
-s float Similarity threshold [0.0-1.0] to consider two appearances of an entity as the same (default: 0.9).
-
-
-
-
-
-
-
-
-setSeed ( s ) [source]
-Sets the seed to select the entities on obfuscate mode
-
-Parameters
-
-s int The seed to select the entities on obfuscate mode
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DocumentLogRegClassifierApproach.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DocumentLogRegClassifierApproach.html
deleted file mode 100644
index 0d47a35c70..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DocumentLogRegClassifierApproach.html
+++ /dev/null
@@ -1,1232 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DocumentLogRegClassifierApproach — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DocumentLogRegClassifierApproach
-
-
-class sparknlp_jsl.annotator. DocumentLogRegClassifierApproach [source]
-Bases: sparknlp.common.AnnotatorApproach
-Trains a model to classify documents with a Logarithmic Regression algorithm. Training data requires columns for
-text and their label. The result is a trained GenericClassifierModel.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-TOKEN
`
-CATEGORY
-
-
-
-
-Parameters
-
-labelCol Column with the value result we are trying to predict.
-
-maxIter maximum number of iterations.
-
-tol convergence tolerance after each iteration.
-
-fitIntercept whether to fit an intercept term, default is true.
-
-labels array to output the label in the original form.
-
-vectorizationModelPath specify the vectorization model if it has been already trained.
-
-classificationModelPath specify the classification model if it has been already trained.
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
-
-
-An example pipeline could then be defined like this
->>> tokenizer = Tokenizer () \
-... . setInputCols ( "document" ) \
-... . setOutputCol ( "token" )
-...
->>> normalizer = Normalizer () \
-... . setInputCols ( "token" ) \
-... . setOutputCol ( "normalized" )
-...
->>> stopwords_cleaner = StopWordsCleaner () \
-... . setInputCols ( "normalized" ) \
-... . setOutputCol ( "cleanTokens" ) \
-... . setCaseSensitive ( False )
-...
-...stemmer = Stemmer() ... .setInputCols("cleanTokens") ... .setOutputCol("stem")
-...
->>> gen_clf = DocumentLogRegClassifierApproach () \
-... . setLabelColumn ( "category" ) \
-... . setInputCols ( "stem" ) \
-... . setOutputCol ( "prediction" )
-...
->>> pipeline = Pipeline () . setStages ([
-... document_assembler ,
-... tokenizer ,
-... normalizer ,
-... stopwords_cleaner ,
-... stemmer ,
-... logreg
-...])
-...
->>> clf_model = pipeline . fit ( data )
-
-
-Methods
-
-
-
-
-
-
-__init__
()
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-fit
(dataset[, params])
-Fits a model to the input dataset with optional parameters.
-
-fitMultiple
(dataset, paramMaps)
-Fits a model to the input dataset for each param map in paramMaps .
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setClassificationModelPath
(value)
-Sets a path to the the classification model if it has been already trained.
-
-setFitIntercept
(merge)
-Sets whether to fit an intercept term, default is true.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLabelColumn
(label)
-Sets column with the value result we are trying to predict.
-
-setLabels
(value)
-Sets array to output the label in the original form.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMaxIter
(k)
-Sets maximum number of iterations.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setTol
(dist)
-Sets convergence tolerance after each iteration.
-
-setVectorizationModelPath
(value)
-Sets a path to the the classification model if it has been already trained.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-classificationModelPath
-
-
-fitIntercept
-
-
-getter_attrs
-
-
-inputCols
-
-
-labelCol
-
-
-labels
-
-
-lazyAnnotator
-
-
-maxIter
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-tol
-
-
-vectorizationModelPath
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-fit ( dataset , params = None )
-Fits a model to the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-params dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of
-param maps is given, this calls fit on each param map and returns a list of
-models.
-
-
-
-Returns
-
-Transformer
or a list of Transformer
fitted model(s)
-
-
-
-
-
-
-
-
-fitMultiple ( dataset , paramMaps )
-Fits a model to the input dataset for each param map in paramMaps .
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-paramMaps collections.abc.Sequence
A Sequence of param maps.
-
-
-
-Returns
-
-_FitMultipleIterator
A thread safe iterable which contains one model for each param map. Each
-call to next(modelIterator) will return (index, model) where model was fit
-using paramMaps[index] . index values may not be sequential.
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setClassificationModelPath ( value ) [source]
-Sets a path to the the classification model if it has been already trained.
-
-Parameters
-
-label str Path to the the classification model if it has been already trained.
-
-
-
-
-
-
-
-
-setFitIntercept ( merge ) [source]
-Sets whether to fit an intercept term, default is true.
-
-Parameters
-
-label str Whether to fit an intercept term, default is true.
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLabelColumn ( label ) [source]
-Sets column with the value result we are trying to predict.
-
-Parameters
-
-label str Column with the value result we are trying to predict.
-
-
-
-
-
-
-
-
-setLabels ( value ) [source]
-Sets array to output the label in the original form.
-
-Parameters
-
-label list array to output the label in the original form.
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMaxIter ( k ) [source]
-Sets maximum number of iterations.
-
-Parameters
-
-k int Maximum number of iterations.
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setTol ( dist ) [source]
-Sets convergence tolerance after each iteration.
-
-Parameters
-
-dist float Convergence tolerance after each iteration.
-
-
-
-
-
-
-
-
-setVectorizationModelPath ( value ) [source]
-Sets a path to the the classification model if it has been already trained.
-
-Parameters
-
-label str Path to the the classification model if it has been already trained.
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DocumentLogRegClassifierModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DocumentLogRegClassifierModel.html
deleted file mode 100644
index 2e88ca74be..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DocumentLogRegClassifierModel.html
+++ /dev/null
@@ -1,1097 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DocumentLogRegClassifierModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DocumentLogRegClassifierModel
-
-
-class sparknlp_jsl.annotator. DocumentLogRegClassifierModel ( classname = 'com.johnsnowlabs.nlp.annotators.classification.DocumentLogRegClassifierModel' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Classifies documents with a Logarithmic Regression algorithm.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-TOKEN
-CATEGORY
-
-
-
-
-Parameters
-
-mergeChunks Whether to merge all chunks in a document or not (Default: false)
-
-labels Array to output the label in the original form.
-
-vectorizationModel Vectorization model if it has been already trained.
-
-classificationModel Classification model if it has been already trained.
-
-
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-pretrained
(name[, lang, remote_loc])
-
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setClassificationModel
(merge)
-Sets a path to the the classification model if it has been already trained.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLabels
(value)
-Sets array to output the label in the original form.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMergeChunks
(merge)
-Sets hether to merge all chunks in a document or not (Default: false)
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setVectorizationModel
(merge)
-Sets a path to the the classification model if it has been already trained.
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-classificationModel
-
-
-getter_attrs
-
-
-inputCols
-
-
-labels
-
-
-lazyAnnotator
-
-
-mergeChunks
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-vectorizationModel
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setClassificationModel ( merge ) [source]
-Sets a path to the the classification model if it has been already trained.
-
-Parameters
-
-label: :class:`pyspark.ml.PipelineModel` Classification model if it has been already trained.
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLabels ( value ) [source]
-Sets array to output the label in the original form.
-
-Parameters
-
-label list array to output the label in the original form.
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMergeChunks ( merge ) [source]
-Sets hether to merge all chunks in a document or not (Default: false)
-
-Parameters
-
-label list whether to merge all chunks in a document or not (Default: false)
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setVectorizationModel ( merge ) [source]
-Sets a path to the the classification model if it has been already trained.
-
-Parameters
-
-label: :class:`pyspark.ml.PipelineModel` Classification model if it has been already trained.
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DrugNormalizer.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DrugNormalizer.html
deleted file mode 100644
index 655a68a2a4..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.DrugNormalizer.html
+++ /dev/null
@@ -1,1072 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DrugNormalizer — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.DrugNormalizer
-
-
-class sparknlp_jsl.annotator. DrugNormalizer [source]
-Bases: sparknlp.common.AnnotatorModel
-
-Annotator which normalizes raw text from clinical documents, e.g. scraped web pages or xml documents, from document type columns into Sentence. Removes all dirty characters from text following one or more input regex patterns.
-Can apply non wanted character removal which a specific policy.
-Can apply lower case normalization.
-
-
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT
-DOCUMENT
-
-
-
-
-Parameters
-
-lowercase whether to convert strings to lowercase
-
-policy policy to remove patterns from text. Defaults “all”
-
-
-
-
-Examples
->>> data = spark . createDataFrame ([
-... [ "Sodium Chloride/Potassium Chloride 13bag" ],
-... [ "interferon alfa-2b 10 million unit ( 1 ml ) injec" ],
-... [ "aspirin 10 meq/ 5 ml oral sol" ]
-... ]) . toDF ( "text" )
->>> document = DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document" )
->>> drugNormalizer = DrugNormalizer () . setInputCols ([ "document" ]) . setOutputCol ( "document_normalized" )
->>> trainingPipeline = Pipeline ( stages = [ document , drugNormalizer ])
->>> result = trainingPipeline . fit ( data ) . transform ( data )
->>> result . selectExpr ( "explode(document_normalized.result) as normalized_text" ) . show ( truncate = False )
-+----------------------------------------------------+
-|normalized_text |
-+----------------------------------------------------+
-|Sodium Chloride / Potassium Chloride 13 bag |
-|interferon alfa - 2b 10000000 unt ( 1 ml ) injection|
-|aspirin 2 meq/ml oral solution |
-+----------------------------------------------------+
-
-
-Methods
-
-
-
-
-
-
-__init__
()
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setLowercase
(value)
-Sets whether to convert strings to lowercase
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setPolicy
(value)
-Sets policy to remove patterns from text.
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-lowercase
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-policy
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setLowercase ( value ) [source]
-Sets whether to convert strings to lowercase
-
-Parameters
-
-p bool Whether to convert strings to lowercase
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setPolicy ( value ) [source]
-Sets policy to remove patterns from text.
-
-Parameters
-
-p str policy to remove patterns from text.
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.EntityChunkEmbeddings.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.EntityChunkEmbeddings.html
deleted file mode 100644
index d3b57419a6..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.EntityChunkEmbeddings.html
+++ /dev/null
@@ -1,1356 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.EntityChunkEmbeddings — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.EntityChunkEmbeddings
-
-
-class sparknlp_jsl.annotator. EntityChunkEmbeddings ( classname = 'com.johnsnowlabs.nlp.annotators.embeddings.EntityChunkEmbeddings' , java_model = None ) [source]
-Bases: sparknlp.annotator.BertSentenceEmbeddings
-
-Weighted average embeddings of multiple named entities chunk annotations
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DEPENDENCY, CHUNK
-SENTENCE_EMBEDDINGS
-
-
-
-
-
-Parameters
-
-targetEntities
-Target entities and their related entities
-
-
-entityWeights Relative weights of entities.
-
-maxSyntacticDistance Maximal syntactic distance between related entities. Default value is 2.
-
-
-
-
-
-
-
-
-
-
-
-
-result
-drug_embedding”
-
-metformin 125 mg
-250 mg coumadin
-one pill paracetamol
-[-0.267413, 0.07614058, -0.5620966, 0.83838946, 0.8911504]
-[0.22319649, -0.07094894, -0.6885556, 0.79176235, 0.82672405]
-[-0.10939768, -0.29242, -0.3574444, 0.3981813, 0.79609615]
-
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getBatchSize
()
-Gets current batch size.
-
-getCaseSensitive
()
-Gets whether to ignore case in tokens for embeddings matching.
-
-getDimension
()
-Gets embeddings dimension.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-getStorageRef
()
-Gets unique reference name for identification.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-loadSavedModel
(folder, spark_session)
-Loads a locally saved model.
-
-pretrained
([name, lang, remote_loc])
-Downloads and loads a pretrained model.
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBatchSize
(v)
-Sets batch size.
-
-setCaseSensitive
(value)
-Sets whether to ignore case in tokens for embeddings matching.
-
-setConfigProtoBytes
(b)
-Sets configProto from tensorflow, serialized into byte array.
-
-setDimension
(value)
-Sets embeddings dimension.
-
-setEntityWeights
([weights])
-Sets the relative weights of the embeddings of specific entities. By default the dictionary is empty and
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setIsLong
(value)
-Sets whether to use Long type instead of Int type for inputs buffer.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMaxSentenceLength
(value)
-Sets max sentence length to process.
-
-setMaxSyntacticDistance
(distance)
-Sets the maximal syntactic distance between related entities. Default value is 2. Parameters ---------- distance : int Maximal syntactic distance.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setStorageRef
(value)
-Sets unique reference name for identification.
-
-setTargetEntities
([entities])
-Sets the target entities and maps them to their related entities.
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-batchSize
-
-
-caseSensitive
-
-
-configProtoBytes
-
-
-dimension
-
-
-entityWeights
-
-
-getter_attrs
-
-
-inputCols
-
-
-isLong
-
-
-lazyAnnotator
-
-
-maxSentenceLength
-
-
-maxSyntacticDistance
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-storageRef
-
-
-targetEntities
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getBatchSize ( )
-Gets current batch size.
-
-Returns
-
-int Current batch size
-
-
-
-
-
-
-
-
-getCaseSensitive ( )
-Gets whether to ignore case in tokens for embeddings matching.
-
-Returns
-
-bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-getDimension ( )
-Gets embeddings dimension.
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-getStorageRef ( )
-Gets unique reference name for identification.
-
-Returns
-
-str Unique reference name for identification
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-static loadSavedModel ( folder , spark_session )
-Loads a locally saved model.
-
-Parameters
-
-folder str Folder of the saved model
-
-spark_session pyspark.sql.SparkSession The current SparkSession
-
-
-
-Returns
-
-BertSentenceEmbeddings The restored model
-
-
-
-
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-static pretrained ( name = 'sbiobert_base_cased_mli' , lang = 'en' , remote_loc = 'clinical/models' ) [source]
-Downloads and loads a pretrained model.
-
-Parameters
-
-name str, optional Name of the pretrained model, by default “sent_small_bert_L2_768”
-
-lang str, optional Language of the pretrained model, by default “en”
-
-remote_loc str, optional Optional remote address of the resource, by default None. Will use
-Spark NLPs repositories otherwise.
-
-
-
-Returns
-
-BertSentenceEmbeddings The restored model
-
-
-
-
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBatchSize ( v )
-Sets batch size.
-
-Parameters
-
-v int Batch size
-
-
-
-
-
-
-
-
-setCaseSensitive ( value )
-Sets whether to ignore case in tokens for embeddings matching.
-
-Parameters
-
-value bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-setConfigProtoBytes ( b )
-Sets configProto from tensorflow, serialized into byte array.
-
-Parameters
-
-b List[str] ConfigProto from tensorflow, serialized into byte array
-
-
-
-
-
-
-
-
-setDimension ( value )
-Sets embeddings dimension.
-
-Parameters
-
-value int Embeddings dimension
-
-
-
-
-
-
-
-
-setEntityWeights ( weights = {} ) [source]
-
-Sets the relative weights of the embeddings of specific entities. By default the dictionary is empty and all entities have equal weights. If non-empty and some entity is not in it, then its weight is set to 0.
-
-
-
-Parameters
-
-weights: dict[str, float] Dictionary with the relative weighs of entities. The notation TARGET_ENTITY:RELATED_ENTITY can be used to
-specify the weight of a entity which is related to specific target entity (e.g. “DRUG:SYMPTOM”: 0.3).
-Entity names are case insensitive.
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setIsLong ( value )
-Sets whether to use Long type instead of Int type for inputs buffer.
-Some Bert models require Long instead of Int.
-
-Parameters
-
-value bool Whether to use Long type instead of Int type for inputs buffer
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMaxSentenceLength ( value )
-Sets max sentence length to process.
-
-Parameters
-
-value int Max sentence length to process
-
-
-
-
-
-
-
-
-setMaxSyntacticDistance ( distance ) [source]
-Sets the maximal syntactic distance between related entities. Default value is 2.
-Parameters
-———-
-distance : int
-
-Maximal syntactic distance
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setStorageRef ( value )
-Sets unique reference name for identification.
-
-Parameters
-
-value str Unique reference name for identification
-
-
-
-
-
-
-
-
-setTargetEntities ( entities = {} ) [source]
-Sets the target entities and maps them to their related entities. A target entity with an empty list of
-related entities means all other entities are assumed to be related to it.
-
-Parameters
-
-entities: dict[str, list[str]] Dictionary with target and related entities (TARGET: [RELATED1, RELATED2,…]). If the list of related
-entities is empty, then all non-target entities are considered.
-Entity names are case insensitive.
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.GenericClassifierApproach.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.GenericClassifierApproach.html
deleted file mode 100644
index 088f52f995..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.GenericClassifierApproach.html
+++ /dev/null
@@ -1,1286 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.GenericClassifierApproach — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.GenericClassifierApproach
-
-
-class sparknlp_jsl.annotator. GenericClassifierApproach ( classname = 'com.johnsnowlabs.nlp.annotators.generic_classifier.GenericClassifierApproach' ) [source]
-Bases: sparknlp.common.AnnotatorApproach
-Trains a TensorFlow model for generic classification of feature vectors. It takes FEATURE_VECTOR annotations from
-FeaturesAssembler` as input, classifies them and outputs CATEGORY annotations.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-FEATURE_VECTOR
-CATEGORY
-
-
-
-
-Parameters
-
-labelColumn Column with one label per document
-
-batchSize Size for each batch in the optimization process
-
-epochsN Number of epochs for the optimization process
-
-learningRate Learning rate for the optimization proces
-
-dropou Dropout at the output of each laye
-
-validationSplit Validaiton split - how much data to use for validation
-
-modelFile File name to load the mode from
-
-fixImbalance A flag indicating whenther to balance the trainig set
-
-featureScaling Feature scaling method. Possible values are ‘zscore’, ‘minmax’ or empty (no scaling)
-
-outputLogsPath Path to folder where logs will be saved. If no path is specified, no logs are generated
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> features_asm = FeaturesAssembler () ... . setInputCols ([ "feature_1" , "feature_2" , "..." , "feature_n" ]) ... . setOutputCol ( "features" )
-...
->>> gen_clf = GenericClassifierApproach () \
-... . setLabelColumn ( "target" ) \
-... . setInputCols ([ "features" ]) \
-... . setOutputCol ( "prediction" ) \
-... . setModelFile ( "/path/to/graph_file.pb" ) \
-... . setEpochsNumber ( 50 ) \
-... . setBatchSize ( 100 ) \
-... . setFeatureScaling ( "zscore" ) \
-... . setlearningRate ( 0.001 ) \
-... . setFixImbalance ( True ) \
-... . setOutputLogsPath ( "logs" ) \
-... . setValidationSplit ( 0.2 ) # keep 20% of the data for validation purposes
-...
->>> pipeline = Pipeline () . setStages ([
-... features_asm ,
-... gen_clf
-...])
-...
->>> clf_model = pipeline . fit ( data )
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname])
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-fit
(dataset[, params])
-Fits a model to the input dataset with optional parameters.
-
-fitMultiple
(dataset, paramMaps)
-Fits a model to the input dataset for each param map in paramMaps .
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBatchSize
(size)
-Size for each batch in the optimization process
-
-setDropout
(dropout)
-Sets drouptup
-
-setEpochsNumber
(epochs)
-Sets number of epochs for the optimization process
-
-setFeatureScaling
(feature_scaling)
-Sets Feature scaling method.
-
-setFixImbalance
(fix_imbalance)
-Sets A flag indicating whenther to balance the trainig set.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLabelCol
(label_column)
-Sets Size for each batch in the optimization process
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setLearningRate
(lamda)
-Sets learning rate for the optimization process
-
-setModelFile
(mode_file)
-Sets file name to load the mode from"
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setOutputLogsPath
(output_logs_path)
-Sets path to folder where logs will be saved.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setValidationSplit
(validation_split)
-Sets validaiton split - how much data to use for validation
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-batchSize
-
-
-dropout
-
-
-epochsN
-
-
-featureScaling
-
-
-fixImbalance
-
-
-getter_attrs
-
-
-inputCols
-
-
-labelColumn
-
-
-lazyAnnotator
-
-
-learningRate
-
-
-modelFile
-
-
-outputCol
-
-
-outputLogsPath
-
-
-params
-Returns all params ordered by name.
-
-validationSplit
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-fit ( dataset , params = None )
-Fits a model to the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-params dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of
-param maps is given, this calls fit on each param map and returns a list of
-models.
-
-
-
-Returns
-
-Transformer
or a list of Transformer
fitted model(s)
-
-
-
-
-
-
-
-
-fitMultiple ( dataset , paramMaps )
-Fits a model to the input dataset for each param map in paramMaps .
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-paramMaps collections.abc.Sequence
A Sequence of param maps.
-
-
-
-Returns
-
-_FitMultipleIterator
A thread safe iterable which contains one model for each param map. Each
-call to next(modelIterator) will return (index, model) where model was fit
-using paramMaps[index] . index values may not be sequential.
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBatchSize ( size ) [source]
-Size for each batch in the optimization process
-
-Parameters
-
-size int Size for each batch in the optimization process
-
-
-
-
-
-
-
-
-setDropout ( dropout ) [source]
-Sets drouptup
-
-Parameters
-
-dropout float Dropout at the output of each layer
-
-
-
-
-
-
-
-
-setEpochsNumber ( epochs ) [source]
-Sets number of epochs for the optimization process
-
-Parameters
-
-epochs int Number of epochs for the optimization process
-
-
-
-
-
-
-
-
-setFeatureScaling ( feature_scaling ) [source]
-Sets Feature scaling method. Possible values are ‘zscore’, ‘minmax’ or empty (no scaling
-
-Parameters
-
-feature_scaling str Feature scaling method. Possible values are ‘zscore’, ‘minmax’ or empty (no scaling
-
-
-
-
-
-
-
-
-setFixImbalance ( fix_imbalance ) [source]
-Sets A flag indicating whenther to balance the trainig set.
-
-Parameters
-
-fix_imbalance bool A flag indicating whenther to balance the trainig set.
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLabelCol ( label_column ) [source]
-Sets Size for each batch in the optimization process
-
-Parameters
-
-label str Column with the value result we are trying to predict.
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setLearningRate ( lamda ) [source]
-Sets learning rate for the optimization process
-
-Parameters
-
-lamda float Learning rate for the optimization process
-
-
-
-
-
-
-
-
-setModelFile ( mode_file ) [source]
-Sets file name to load the mode from”
-
-Parameters
-
-label str File name to load the mode from”
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setOutputLogsPath ( output_logs_path ) [source]
-Sets path to folder where logs will be saved. If no path is specified, no logs are generated
-
-Parameters
-
-label str Path to folder where logs will be saved. If no path is specified, no logs are generated
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setValidationSplit ( validation_split ) [source]
-Sets validaiton split - how much data to use for validation
-
-Parameters
-
-validation_split float Validaiton split - how much data to use for validation
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.GenericClassifierModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.GenericClassifierModel.html
deleted file mode 100644
index df8e8916d1..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.GenericClassifierModel.html
+++ /dev/null
@@ -1,1033 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.GenericClassifierModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.GenericClassifierModel
-
-
-class sparknlp_jsl.annotator. GenericClassifierModel ( classname = 'com.johnsnowlabs.nlp.annotators.generic_classifier.GenericClassifierModel' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Generic classifier of feature vectors. It takes FEATURE_VECTOR annotations from
-FeaturesAssembler` as input, classifies them and outputs CATEGORY annotations.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-FEATURE_VECTOR
-CATEGORY
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> features_asm = FeaturesAssembler () \
-... . setInputCols ([ "feature_1" , "feature_2" , "..." , "feature_n" ]) \
-... . setOutputCol ( "features" )
-...
->>> gen_clf = GenericClassifierModel . pretrained () \
-... . setInputCols ([ "features" ]) \
-... . setOutputCol ( "prediction" ) \
-...
->>> pipeline = Pipeline () . setStages ([
-... features_asm ,
-... gen_clf
-...])
-...
->>> clf_model = pipeline . fit ( data )
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-pretrained
(name[, lang, remote_loc])
-
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-classes
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.IOBTagger.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.IOBTagger.html
deleted file mode 100644
index 1478af6feb..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.IOBTagger.html
+++ /dev/null
@@ -1,1063 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.IOBTagger — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.IOBTagger
-
-
-class sparknlp_jsl.annotator. IOBTagger ( classname = 'com.johnsnowlabs.nlp.annotators.ner.IOBTagger' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Merges token tags and NER labels from chunks in the specified format.
-For example output columns as inputs from
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-TOKEN, CHUNK
-NAMED_ENTITY
-
-
-
-
-Parameters
-
-Scheme Format of tags, either IOB or BIOES
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> documentAssembler = DocumentAssembler () \
->>> data = spark . createDataFrame ([[ "A 63-year-old man presents to the hospital ..." ]]) . toDF ( "text" )
->>> documentAssembler = DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document" )
->>> sentenceDetector = SentenceDetector () . setInputCols ([ "document" ]) . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () . setInputCols ([ "sentence" ]) . setOutputCol ( "token" )
->>> embeddings = WordEmbeddingsModel . pretrained ( "embeddings_clinical" , "en" , "clinical/models" ) . setOutputCol ( "embs" )
->>> nerModel = MedicalNerModel . pretrained ( "ner_jsl" , "en" , "clinical/models" ) . setInputCols ([ "sentence" , "token" , "embs" ]) . setOutputCol ( "ner" )
->>> nerConverter = NerConverter () . setInputCols ([ "sentence" , "token" , "ner" ]) . setOutputCol ( "ner_chunk" )
-...
->>> iobTagger = IOBTagger () . setInputCols ([ "token" , "ner_chunk" ]) . setOutputCol ( "ner_label" )
->>> pipeline = Pipeline ( stages = [ documentAssembler , sentenceDetector , tokenizer , embeddings , nerModel , nerConverter , iobTagger ])
-...
->>> result . selectExpr ( "explode(ner_label) as a" ) ... . selectExpr ( "a.begin" , "a.end" , "a.result as chunk" , "a.metadata.word as word" ) ... . where ( "chunk!='O'" ) . show ( 5 , False )
-+-----+---+-----------+-----------+
-|begin|end|chunk |word |
-+-----+---+-----------+-----------+
-|5 |15 |B-Age |63-year-old|
-|17 |19 |B-Gender |man |
-|64 |72 |B-Modifier |recurrent |
-|98 |107|B-Diagnosis|cellulitis |
-|110 |119|B-Diagnosis|pneumonias |
-+-----+---+-----------+-----------+
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setScheme
(f)
-Sets format of tags, either IOB or BIOES
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-scheme
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setScheme ( f ) [source]
-Sets format of tags, either IOB or BIOES
-
-Parameters
-
-pairs str Format of tags, either IOB or BIOES
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalBertForSequenceClassification.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalBertForSequenceClassification.html
deleted file mode 100644
index a5364a3b21..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalBertForSequenceClassification.html
+++ /dev/null
@@ -1,1264 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.MedicalBertForSequenceClassification — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.MedicalBertForSequenceClassification
-
-
-class sparknlp_jsl.annotator. MedicalBertForSequenceClassification ( classname = 'com.johnsnowlabs.nlp.annotators.classification.MedicalBertForSequenceClassification' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
, sparknlp.common.HasCaseSensitiveProperties
, sparknlp.common.HasBatchedAnnotate
-MedicalBertForTokenClassifier can load Bert Models with sequence classification/regression head on top
-(a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.
-Pretrained models can be loaded with pretrained()
of the companion
-object:
-For available pretrained models please see the Models Hub .
-Models from the HuggingFace 🤗 Transformers library are also compatible with
-Spark NLP 🚀. To see which models are compatible and how to import them see
-Import Transformers into Spark NLP 🚀 .
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, TOKEN
-CATEGORY
-
-
-
-
-Parameters
-
-batchSize Batch size. Large values allows faster processing but requires more
-memory, by default 8
-
-caseSensitive Whether to ignore case in tokens for embeddings matching, by default
-True
-
-configProtoBytes ConfigProto from tensorflow, serialized into byte array.
-
-maxSentenceLength Max sentence length to process, by default 128
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.annotator import *
->>> from pyspark.ml import Pipeline
->>> documentAssembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "document" ]) \
-... . setOutputCol ( "token" )
->>> tokenClassifier = MedicalBertForSequenceClassification . pretrained () \
-... . setInputCols ([ "token" , "document" ]) \
-... . setOutputCol ( "label" ) \
-... . setCaseSensitive ( True )
->>> pipeline = Pipeline () . setStages ([
-... documentAssembler ,
-... tokenizer ,
-... tokenClassifier
-... ])
->>> data = spark . createDataFrame ([[ "I felt a bit drowsy and had blurred vision after taking Aspirin." ]]) . toDF ( "text" )
->>> result = pipeline . fit ( data ) . transform ( data )
->>> result . select ( "label.result" ) . show ( truncate = False )
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getBatchSize
()
-Gets current batch size.
-
-getCaseSensitive
()
-Gets whether to ignore case in tokens for embeddings matching.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-loadSavedModel
(folder, spark_session)
-Loads a locally saved model Parameters ---------- folder : str Folder of the saved model spark_session : pyspark.sql.SparkSession The current SparkSession
-
-loadSavedModelOpenSource
(...)
-Loads a locally saved model.
-
-pretrained
([name, lang, remote_loc])
-Downloads and loads a pretrained model.
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBatchSize
(v)
-Sets batch size.
-
-setCaseSensitive
(value)
-Sets whether to ignore case in tokens for embeddings matching.
-
-setCoalesceSentences
(value)
-Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging probabilities in all sentences.
-
-setConfigProtoBytes
(b)
-Sets configProto from tensorflow, serialized into byte array.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMaxSentenceLength
(value)
-Sets max sentence length to process, by default 128.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-batchSize
-
-
-caseSensitive
-
-
-coalesceSentences
-
-
-configProtoBytes
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-maxSentenceLength
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getBatchSize ( )
-Gets current batch size.
-
-Returns
-
-int Current batch size
-
-
-
-
-
-
-
-
-getCaseSensitive ( )
-Gets whether to ignore case in tokens for embeddings matching.
-
-Returns
-
-bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-static loadSavedModel ( folder , spark_session ) [source]
-Loads a locally saved model
-Parameters
-———-
-folder : str
-
-Folder of the saved model
-
-
-spark_sessionpyspark.sql.SparkSession The current SparkSession
-
-
-
-Returns
-
-MedicalBertForSequenceClassification The restored model
-
-
-
-
-
-
-
-
-static loadSavedModelOpenSource ( bertForTokenClassifierPath , tfModelPath , spark_session ) [source]
-Loads a locally saved model.
-
-Parameters
-
-bertForTokenClassifierPath str Folder of the bertForTokenClassifier
-
-tfModelPath str Folder taht contains the tf model
-
-spark_session pyspark.sql.SparkSession The current SparkSession
-
-Returns
-——-
-MedicalBertForSequenceClassification The restored model
-
-
-
-
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-static pretrained ( name = 'bert_sequence_classifier_ade' , lang = 'en' , remote_loc = 'clinical/models' ) [source]
-Downloads and loads a pretrained model.
-
-Parameters
-
-name str, optional Name of the pretrained model.
-
-lang str, optional Language of the pretrained model, by default “en”
-
-remote_loc str, optional Optional remote address of the resource, by default None. Will use
-Spark NLPs repositories otherwise.
-
-
-
-Returns
-
-MedicalBertForSequenceClassification The restored model
-
-
-
-
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBatchSize ( v )
-Sets batch size.
-
-Parameters
-
-v int Batch size
-
-
-
-
-
-
-
-
-setCaseSensitive ( value )
-Sets whether to ignore case in tokens for embeddings matching.
-
-Parameters
-
-value bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-setCoalesceSentences ( value ) [source]
-Instead of 1 class per sentence (if inputCols is ‘’’sentence’’’) output 1 class per document by averaging probabilities in all sentences.
-Due to max sequence length limit in almost all transformer models such as BERT (512 tokens), this parameter helps feeding all the sentences
-into the model and averaging all the probabilities for the entire document instead of probabilities per sentence. (Default: true)
-
-Parameters
-
-value bool If the output of all sentences will be averaged to one output
-
-
-
-
-
-
-
-
-setConfigProtoBytes ( b ) [source]
-Sets configProto from tensorflow, serialized into byte array.
-
-Parameters
-
-b List[str] ConfigProto from tensorflow, serialized into byte array
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMaxSentenceLength ( value ) [source]
-Sets max sentence length to process, by default 128.
-
-Parameters
-
-value int Max sentence length to process
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalBertForTokenClassifier.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalBertForTokenClassifier.html
deleted file mode 100644
index 19c0723a8a..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalBertForTokenClassifier.html
+++ /dev/null
@@ -1,1256 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.MedicalBertForTokenClassifier — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.MedicalBertForTokenClassifier
-
-
-class sparknlp_jsl.annotator. MedicalBertForTokenClassifier ( classname = 'com.johnsnowlabs.nlp.annotators.classification.MedicalBertForTokenClassifier' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
, sparknlp.common.HasCaseSensitiveProperties
, sparknlp.common.HasBatchedAnnotate
-MedicalBertForTokenClassifier can load Bert Models with a token
-classification head on top (a linear layer on top of the hidden-states
-output) e.g. for Named-Entity-Recognition (NER) tasks.
-Pretrained models can be loaded with pretrained()
of the companion
-object:
->>> embeddings = MedicalBertForTokenClassifier . pretrained () \
-... . setInputCols ([ "token" , "document" ]) \
-... . setOutputCol ( "label" )
-
-
-The default model is "bert_token_classifier_ner_bionlp"
, if no name is
-provided.
-For available pretrained models please see the Models Hub .
-Models from the HuggingFace 🤗 Transformers library are also compatible with
-Spark NLP 🚀. To see which models are compatible and how to import them see
-Import Transformers into Spark NLP 🚀 .
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, TOKEN
-NAMED_ENTITY
-
-
-
-
-Parameters
-
-batchSize Batch size. Large values allows faster processing but requires more
-memory, by default 8
-
-caseSensitive Whether to ignore case in tokens for embeddings matching, by default
-True
-
-configProtoBytes ConfigProto from tensorflow, serialized into byte array.
-
-maxSentenceLength Max sentence length to process, by default 128
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.annotator import *
->>> from pyspark.ml import Pipeline
->>> documentAssembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "document" ]) \
-... . setOutputCol ( "token" )
->>> tokenClassifier = MedicalBertForTokenClassifier . pretrained () \
-... . setInputCols ([ "token" , "document" ]) \
-... . setOutputCol ( "label" ) \
-... . setCaseSensitive ( True )
->>> pipeline = Pipeline () . setStages ([
-... documentAssembler ,
-... tokenizer ,
-... tokenClassifier
-... ])
->>> data = spark . createDataFrame ([[ "Both the erbA IRES and the erbA/myb virus constructs transformed erythroid cells after infection of bone marrow or blastoderm cultures." ]]) . toDF ( "text" )
->>> result = pipeline . fit ( data ) . transform ( data )
->>> result . select ( "label.result" ) . show ( truncate = False )
-+------------------------------------------------------------------------------------+
-|result
-|
-+------------------------------------------------------------------------------------+
-|[O, O, B-Organism, I-Organism, O, O, B-Organism, I-Organism, O, O, B-Cell, I-Cell, O,
-O, O, B-Multi-tissue_structure, I-Multi-tissue_structure, O, B-Cell, I-Cell, O]|
-+------------------------------------------------------------------------------------+
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getBatchSize
()
-Gets current batch size.
-
-getCaseSensitive
()
-Gets whether to ignore case in tokens for embeddings matching.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-loadSavedModel
(folder, spark_session)
-Loads a locally saved model.
-
-loadSavedModelOpenSource
(...)
-Loads a locally saved model.
-
-pretrained
([name, lang, remote_loc])
-Downloads and loads a pretrained model.
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBatchSize
(v)
-Sets batch size.
-
-setCaseSensitive
(value)
-Sets whether to ignore case in tokens for embeddings matching.
-
-setConfigProtoBytes
(b)
-Sets configProto from tensorflow, serialized into byte array.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMaxSentenceLength
(value)
-Sets max sentence length to process, by default 128.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-batchSize
-
-
-caseSensitive
-
-
-configProtoBytes
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-maxSentenceLength
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getBatchSize ( )
-Gets current batch size.
-
-Returns
-
-int Current batch size
-
-
-
-
-
-
-
-
-getCaseSensitive ( )
-Gets whether to ignore case in tokens for embeddings matching.
-
-Returns
-
-bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-static loadSavedModel ( folder , spark_session ) [source]
-Loads a locally saved model.
-
-Parameters
-
-folder str Folder of the saved model
-
-spark_session pyspark.sql.SparkSession The current SparkSession
-
-
-
-Returns
-
-MedicalBertForTokenClassifier The restored model
-
-
-
-
-
-
-
-
-static loadSavedModelOpenSource ( bertForTokenClassifierPath , tfModelPath , spark_session ) [source]
-Loads a locally saved model.
-
-Parameters
-
-bertForTokenClassifierPath str Folder of the bertForTokenClassifier
-
-tfModelPath str Folder taht contains the tf model
-
-spark_session pyspark.sql.SparkSession The current SparkSession
-
-Returns
-——-
-MedicalBertForTokenClassifier The restored model
-
-
-
-
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-static pretrained ( name = 'bert_token_classifier_ner_bionlp' , lang = 'en' , remote_loc = 'clinical/models' ) [source]
-Downloads and loads a pretrained model.
-
-Parameters
-
-name str, optional Name of the pretrained model, by default
-“bert_base_token_classifier_conll03”
-lang : str, optional
-Language of the pretrained model, by default “en”
-remote_loc : str, optional
-Optional remote address of the resource, by default None. Will use
-Spark NLPs repositories otherwise.
-
-
-
-Returns
-
-MedicalBertForTokenClassifier The restored model
-
-
-
-
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBatchSize ( v )
-Sets batch size.
-
-Parameters
-
-v int Batch size
-
-
-
-
-
-
-
-
-setCaseSensitive ( value )
-Sets whether to ignore case in tokens for embeddings matching.
-
-Parameters
-
-value bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-setConfigProtoBytes ( b ) [source]
-Sets configProto from tensorflow, serialized into byte array.
-
-Parameters
-
-b List[str] ConfigProto from tensorflow, serialized into byte array
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMaxSentenceLength ( value ) [source]
-Sets max sentence length to process, by default 128.
-
-Parameters
-
-value int Max sentence length to process
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalDistilBertForSequenceClassification.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalDistilBertForSequenceClassification.html
deleted file mode 100644
index f879904e5d..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalDistilBertForSequenceClassification.html
+++ /dev/null
@@ -1,1278 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.MedicalDistilBertForSequenceClassification — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.MedicalDistilBertForSequenceClassification
-
-
-class sparknlp_jsl.annotator. MedicalDistilBertForSequenceClassification ( classname = 'com.johnsnowlabs.nlp.annotators.classification.MedicalDistilBertForSequenceClassification' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
, sparknlp.common.HasCaseSensitiveProperties
, sparknlp.common.HasBatchedAnnotate
-MedicalDistilBertForSequenceClassification can load DistilBERT Models with sequence classification/regression head on
-top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.
-Pretrained models can be loaded with pretrained()
of the companion
-object:
->>> sequenceClassifier = MedicalDistilBertForSequenceClassification . pretrained () \
-... . setInputCols ([ "token" , "document" ]) \
-... . setOutputCol ( "label" )
-
-
-Models from the HuggingFace 🤗 Transformers library are also compatible with
-Spark NLP 🚀. To see which models are compatible and how to import them see
-Import Transformers into Spark NLP 🚀 .
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, TOKEN
-CATEGORY
-
-
-
-
-Parameters
-
-batchSize Batch size. Large values allows faster processing but requires more
-memory, by default 8
-
-caseSensitive Whether to ignore case in tokens for embeddings matching, by default
-True
-
-configProtoBytes ConfigProto from tensorflow, serialized into byte array.
-
-maxSentenceLength Max sentence length to process, by default 128
-
-coalesceSentences Instead of 1 class per sentence (if inputCols is sentence) output 1 class per document by averaging
-probabilities in all sentences.
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.annotator import *
->>> from pyspark.ml import Pipeline
->>> documentAssembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "document" ]) \
-... . setOutputCol ( "token" )
->>> sequenceClassifier = MedicalDistilBertForSequenceClassification . pretrained () \
-... . setInputCols ([ "token" , "document" ]) \
-... . setOutputCol ( "label" ) \
-... . setCaseSensitive ( True )
->>> pipeline = Pipeline () . setStages ([
-... documentAssembler ,
-... tokenizer ,
-... sequenceClassifier
-... ])
->>> data = spark . createDataFrame ([[ "I felt a bit drowsy and had blurred vision after taking Aspirin." ]]) . toDF ( "text" )
->>> result = pipeline . fit ( data ) . transform ( data )
->>> result . select ( "label.result" ) . show ( truncate = False )
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getBatchSize
()
-Gets current batch size.
-
-getCaseSensitive
()
-Gets whether to ignore case in tokens for embeddings matching.
-
-getClasses
()
-Returns labels used to train this model
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-loadSavedModel
(folder, spark_session)
-Loads a locally saved model.
-
-loadSavedModelOpenSource
(...)
-Loads a locally saved model.
-
-pretrained
([name, lang, remote_loc])
-Downloads and loads a pretrained model.
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBatchSize
(v)
-Sets batch size.
-
-setCaseSensitive
(value)
-Sets whether to ignore case in tokens for embeddings matching.
-
-setCoalesceSentences
(value)
-Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging probabilities in all sentences.
-
-setConfigProtoBytes
(b)
-Sets configProto from tensorflow, serialized into byte array.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMaxSentenceLength
(value)
-Sets max sentence length to process, by default 128.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-batchSize
-
-
-caseSensitive
-
-
-coalesceSentences
-
-
-configProtoBytes
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-maxSentenceLength
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getBatchSize ( )
-Gets current batch size.
-
-Returns
-
-int Current batch size
-
-
-
-
-
-
-
-
-getCaseSensitive ( )
-Gets whether to ignore case in tokens for embeddings matching.
-
-Returns
-
-bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-getClasses ( ) [source]
-Returns labels used to train this model
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-static loadSavedModel ( folder , spark_session ) [source]
-Loads a locally saved model.
-
-Parameters
-
-folder str Folder of the saved model
-
-spark_session pyspark.sql.SparkSession The current SparkSession
-
-
-
-Returns
-
-DistilBertForSequenceClassification The restored model
-
-
-
-
-
-
-
-
-static loadSavedModelOpenSource ( destilBertForTokenClassifierPath , tfModelPath , spark_session ) [source]
-Loads a locally saved model.
-
-Parameters
-
-bertForTokenClassifierPath str Folder of the bertForTokenClassifier
-
-tfModelPath str Folder taht contains the tf model
-
-spark_session pyspark.sql.SparkSession The current SparkSession
-
-Returns
-——-
-MedicalBertForSequenceClassification The restored model
-
-
-
-
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-static pretrained ( name = 'distilbert_sequence_classifier_ade' , lang = 'en' , remote_loc = 'clinical/models' ) [source]
-Downloads and loads a pretrained model.
-
-Parameters
-
-name str, optional Name of the pretrained model, by default
-
-lang str, optional Language of the pretrained model, by default “en”
-
-remote_loc str, optional Optional remote address of the resource, by default None. Will use
-Spark NLPs repositories otherwise.
-
-
-
-Returns
-
-MedicalBertForTokenClassifier The restored model
-
-
-
-
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBatchSize ( v )
-Sets batch size.
-
-Parameters
-
-v int Batch size
-
-
-
-
-
-
-
-
-setCaseSensitive ( value )
-Sets whether to ignore case in tokens for embeddings matching.
-
-Parameters
-
-value bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-setCoalesceSentences ( value ) [source]
-Instead of 1 class per sentence (if inputCols is ‘’’sentence’’’) output 1 class per document by averaging probabilities in all sentences.
-Due to max sequence length limit in almost all transformer models such as BERT (512 tokens), this parameter helps feeding all the sentences
-into the model and averaging all the probabilities for the entire document instead of probabilities per sentence. (Default: true)
-
-Parameters
-
-value bool If the output of all sentences will be averaged to one output
-
-
-
-
-
-
-
-
-setConfigProtoBytes ( b ) [source]
-Sets configProto from tensorflow, serialized into byte array.
-
-Parameters
-
-b List[int] ConfigProto from tensorflow, serialized into byte array
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMaxSentenceLength ( value ) [source]
-Sets max sentence length to process, by default 128.
-
-Parameters
-
-value int Max sentence length to process
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalNerApproach.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalNerApproach.html
deleted file mode 100644
index ea25f76581..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalNerApproach.html
+++ /dev/null
@@ -1,1764 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.MedicalNerApproach — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.MedicalNerApproach
-
-
-class sparknlp_jsl.annotator. MedicalNerApproach [source]
-Bases: sparknlp.common.AnnotatorApproach
, sparknlp.annotator.NerApproach
-This Named Entity recognition annotator allows to train generic NER model
-based on Neural Networks.
-The architecture of the neural network is a Char CNNs - BiLSTM - CRF that
-achieves state-of-the-art in most datasets.
-For instantiated/pretrained models, see NerDLModel
.
-The training data should be a labeled Spark Dataset, in the format of
-CoNLL
2003 IOB with Annotation type columns. The data should
-have columns of type DOCUMENT, TOKEN, WORD_EMBEDDINGS
and an additional
-label column of annotator type NAMED_ENTITY
.
-Excluding the label, this can be done with for example:
-
-For extended examples of usage, see the Spark NLP Workshop .
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, TOKEN, WORD_EMBEDDINGS
-NAMED_ENTITY
-
-
-
-
-Parameters
-
-labelColumn Column with label per each token
-
-entities Entities to recognize
-
-minEpochs Minimum number of epochs to train, by default 0
-
-maxEpochs Maximum number of epochs to train, by default 50
-
-verbose Level of verbosity during training, by default 2
-
-randomSeed Random seed
-
-lr Learning Rate, by default 0.001
-
-po Learning rate decay coefficient. Real Learning Rage = lr / (1 + po *
-epoch), by default 0.005
-
-batchSize Batch size, by default 8
-
-dropout Dropout coefficient, by default 0.5
-
-graphFolder Folder path that contain external graph files
-
-configProtoBytes ConfigProto from tensorflow, serialized into byte array.
-
-useContrib whether to use contrib LSTM Cells. Not compatible with Windows. Might
-slightly improve accuracy
-
-validationSplit Choose the proportion of training dataset to be validated against the
-model on each Epoch. The value should be between 0.0 and 1.0 and by
-default it is 0.0 and off, by default 0.0
-
-evaluationLogExtended Whether logs for validation to be extended, by default False.
-
-testDataset Path to test dataset. If set used to calculate statistic on it during
-training.
-
-includeConfidence whether to include confidence scores in annotation metadata, by default
-False
-
-includeAllConfidenceScores whether to include all confidence scores in annotation metadata or just
-the score of the predicted tag, by default False
-
-enableOutputLogs Whether to use stdout in addition to Spark logs, by default False
-
-outputLogsPath Folder path to save training logs
-
-enableMemoryOptimizer Whether to optimize for large datasets or not. Enabling this option can
-slow down training, by default False
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
-
-
-First extract the prerequisites for the NerDLApproach
->>> documentAssembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
->>> sentence = SentenceDetector () \
-... . setInputCols ([ "document" ]) \
-... . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "sentence" ]) \
-... . setOutputCol ( "token" )
->>> embeddings = BertEmbeddings . pretrained () \
-... . setInputCols ([ "sentence" , "token" ]) \
-... . setOutputCol ( "embeddings" )
-
-
-Then the training can start
->>> nerTagger = MedicalNerApproach () \
-... . setInputCols ([ "sentence" , "token" , "embeddings" ]) \
-... . setLabelColumn ( "label" ) \
-... . setOutputCol ( "ner" ) \
-... . setMaxEpochs ( 1 ) \
-... . setRandomSeed ( 0 ) \
-... . setVerbose ( 0 )
->>> pipeline = Pipeline () . setStages ([
-... documentAssembler ,
-... sentence ,
-... tokenizer ,
-... embeddings ,
-... nerTagger
-... ])
->>> conll = CoNLL ()
->>> trainingData = conll . readDataset ( spark , "src/test/resources/conll2003/eng.train" )
->>> pipelineModel = pipeline . fit ( trainingData )
-
-
-Methods
-
-
-
-
-
-
-__init__
()
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-fit
(dataset[, params])
-Fits a model to the input dataset with optional parameters.
-
-fitMultiple
(dataset, paramMaps)
-Fits a model to the input dataset for each param map in paramMaps .
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLabelColumn
()
-Gets column for label per each token.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBatchSize
(v)
-Sets batch size, by default 64.
-
-setConfigProtoBytes
(b)
-Sets configProto from tensorflow, serialized into byte array.
-
-setDropout
(v)
-Sets dropout coefficient, by default 0.5.
-
-setEarlyStoppingCriterion
(criterion)
-Sets early stopping criterion.
-
-setEarlyStoppingPatience
(patience)
-Sets the number of epochs with no performance improvement before training is terminated.
-
-setEnableMemoryOptimizer
(value)
-Sets Whether to optimize for large datasets or not, by default False.
-
-setEnableOutputLogs
(value)
-Sets whether to use stdout in addition to Spark logs, by default False.
-
-setEntities
(tags)
-Sets entities to recognize.
-
-setEvaluationLogExtended
(v)
-Sets whether logs for validation to be extended, by default False.
-
-setGraphFile
(ff)
-Sets path that contains the external graph file.
-
-setGraphFolder
(p)
-Sets folder path that contain external graph files.
-
-setIncludeAllConfidenceScores
(value)
-Sets whether to include all confidence scores in annotation metadata or just the score of the predicted tag, by default False.
-
-setIncludeConfidence
(value)
-Sets whether to include confidence scores in annotation metadata, by default False.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLabelColumn
(value)
-Sets name of column for data labels.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setLogPrefix
(s)
-Sets folder path to save training logs.
-
-setLr
(v)
-Sets Learning Rate, by default 0.001.
-
-setMaxEpochs
(epochs)
-Sets maximum number of epochs to train.
-
-setMinEpochs
(epochs)
-Sets minimum number of epochs to train.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setOutputLogsPath
(p)
-Sets folder path to save training logs.
-
-setOverrideExistingTags
(value)
-Sets whether to override already learned tags when using a pretrained model to initialize the new model.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setPo
(v)
-Sets Learning rate decay coefficient, by default 0.005.
-
-setPretrainedModelPath
(value)
-Sets folder path to save training logs.
-
-setRandomSeed
(seed)
-Sets random seed for shuffling.
-
-setTagsMapping
(value)
-Sets a map specifying how old tags are mapped to new ones.
-
-setTestDataset
(path[, read_as, options])
-Sets Path to test dataset.
-
-setUseBestModel
(value)
-Sets whether to restore and use the model that has achieved the best performance at the end of the training.
-
-setUseContrib
(v)
-Sets whether to use contrib LSTM Cells.
-
-setValidationSplit
(v)
-Sets the proportion of training dataset to be validated against the model on each Epoch, by default it is 0.0 and off.
-
-setVerbose
(verboseValue)
-Sets level of verbosity during training.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-batchSize
-
-
-configProtoBytes
-
-
-dropout
-
-
-earlyStoppingCriterion
-
-
-earlyStoppingPatience
-
-
-enableMemoryOptimizer
-
-
-enableOutputLogs
-
-
-entities
-
-
-evaluationLogExtended
-
-
-getter_attrs
-
-
-graphFile
-
-
-graphFolder
-
-
-includeAllConfidenceScores
-
-
-includeConfidence
-
-
-inputCols
-
-
-labelColumn
-
-
-lazyAnnotator
-
-
-logPrefix
-
-
-lr
-
-
-maxEpochs
-
-
-minEpochs
-
-
-outputCol
-
-
-outputLogsPath
-
-
-overrideExistingTags
-
-
-params
-Returns all params ordered by name.
-
-po
-
-
-pretrainedModelPath
-
-
-randomSeed
-
-
-tagsMapping
-
-
-testDataset
-
-
-useBestModel
-
-
-useContrib
-
-
-validationSplit
-
-
-verbose
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-fit ( dataset , params = None )
-Fits a model to the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-params dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of
-param maps is given, this calls fit on each param map and returns a list of
-models.
-
-
-
-Returns
-
-Transformer
or a list of Transformer
fitted model(s)
-
-
-
-
-
-
-
-
-fitMultiple ( dataset , paramMaps )
-Fits a model to the input dataset for each param map in paramMaps .
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-paramMaps collections.abc.Sequence
A Sequence of param maps.
-
-
-
-Returns
-
-_FitMultipleIterator
A thread safe iterable which contains one model for each param map. Each
-call to next(modelIterator) will return (index, model) where model was fit
-using paramMaps[index] . index values may not be sequential.
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLabelColumn ( )
-Gets column for label per each token.
-
-Returns
-
-str Column with label per each token
-
-
-
-
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBatchSize ( v ) [source]
-Sets batch size, by default 64.
-
-Parameters
-
-v int Batch size
-
-
-
-
-
-
-
-
-setConfigProtoBytes ( b ) [source]
-Sets configProto from tensorflow, serialized into byte array.
-
-Parameters
-
-b List[str] ConfigProto from tensorflow, serialized into byte array
-
-
-
-
-
-
-
-
-setDropout ( v ) [source]
-Sets dropout coefficient, by default 0.5.
-
-Parameters
-
-v float Dropout coefficient
-
-
-
-
-
-
-
-
-setEarlyStoppingCriterion ( criterion ) [source]
-Sets early stopping criterion. A value 0 means no early stopping.
-
-Parameters
-
-criterion float Early stopping criterion.
-
-
-
-
-
-
-
-
-setEarlyStoppingPatience ( patience ) [source]
-Sets the number of epochs with no performance improvement before training is terminated.
-
-Parameters
-
-patience int Early stopping patience.
-
-
-
-
-
-
-
-
-setEnableMemoryOptimizer ( value ) [source]
-Sets Whether to optimize for large datasets or not, by default False.
-Enabling this option can slow down training.
-
-Parameters
-
-value bool Whether to optimize for large datasets
-
-
-
-
-
-
-
-
-setEnableOutputLogs ( value ) [source]
-Sets whether to use stdout in addition to Spark logs, by default
-False.
-
-Parameters
-
-value bool Whether to use stdout in addition to Spark logs
-
-
-
-
-
-
-
-
-setEntities ( tags )
-Sets entities to recognize.
-
-Parameters
-
-tags List[str] List of entities
-
-
-
-
-
-
-
-
-setEvaluationLogExtended ( v ) [source]
-Sets whether logs for validation to be extended, by default False.
-Displays time and evaluation of each label.
-
-Parameters
-
-v bool Whether logs for validation to be extended
-
-
-
-
-
-
-
-
-setGraphFile ( ff ) [source]
-Sets path that contains the external graph file. When specified, the provided file will be used, and no graph search will happen.
-
-Parameters
-
-p str Path that contains the external graph file. When specified, the provided file will be used, and no graph search will happen.
-
-
-
-
-
-
-
-
-setGraphFolder ( p ) [source]
-Sets folder path that contain external graph files.
-
-Parameters
-
-p str Folder path that contain external graph files
-
-
-
-
-
-
-
-
-setIncludeAllConfidenceScores ( value ) [source]
-Sets whether to include all confidence scores in annotation metadata
-or just the score of the predicted tag, by default False.
-
-Parameters
-
-value bool Whether to include all confidence scores in annotation metadata or
-just the score of the predicted tag
-
-
-
-
-
-
-
-
-setIncludeConfidence ( value ) [source]
-Sets whether to include confidence scores in annotation metadata, by
-default False.
-
-Parameters
-
-value bool Whether to include the confidence value in the output.
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLabelColumn ( value )
-Sets name of column for data labels.
-
-Parameters
-
-value str Column for data labels
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setLogPrefix ( s ) [source]
-Sets folder path to save training logs.
-
-Parameters
-
-p str Folder path to save training logs
-
-
-
-
-
-
-
-
-setLr ( v ) [source]
-Sets Learning Rate, by default 0.001.
-
-Parameters
-
-v float Learning Rate
-
-
-
-
-
-
-
-
-setMaxEpochs ( epochs )
-Sets maximum number of epochs to train.
-
-Parameters
-
-epochs int Maximum number of epochs to train
-
-
-
-
-
-
-
-
-setMinEpochs ( epochs )
-Sets minimum number of epochs to train.
-
-Parameters
-
-epochs int Minimum number of epochs to train
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setOutputLogsPath ( p ) [source]
-Sets folder path to save training logs.
-
-Parameters
-
-p str Folder path to save training logs
-
-
-
-
-
-
-
-
-setOverrideExistingTags ( value ) [source]
-Sets whether to override already learned tags when using a pretrained model to initialize the new model. Default is ‘true’
-
-Parameters
-
-value bool Whether to override already learned tags when using a pretrained model to initialize the new model. Default is ‘true’
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setPo ( v ) [source]
-Sets Learning rate decay coefficient, by default 0.005.
-Real Learning Rage is lr / (1 + po * epoch).
-
-Parameters
-
-v float Learning rate decay coefficient
-
-
-
-
-
-
-
-
-setPretrainedModelPath ( value ) [source]
-Sets folder path to save training logs.
-
-Parameters
-
-value str Path to an already trained MedicalNerModel, which is used as a starting point for training the new model.
-
-
-
-
-
-
-
-
-setRandomSeed ( seed )
-Sets random seed for shuffling.
-
-Parameters
-
-seed int Random seed for shuffling
-
-
-
-
-
-
-
-
-setTagsMapping ( value ) [source]
-Sets a map specifying how old tags are mapped to new ones. It only works if setOverrideExistingTags
-
-Parameters
-
-value list A map specifying how old tags are mapped to new ones. It only works if setOverrideExistingTags
-
-
-
-
-
-
-
-
-setTestDataset ( path , read_as = 'SPARK' , options = {'format': 'parquet'} ) [source]
-Sets Path to test dataset. If set used to calculate statistic on it
-during training.
-
-Parameters
-
-path str Path to test dataset
-
-read_as str, optional How to read the resource, by default ReadAs.SPARK
-
-options dict, optional Options for reading the resource, by default {“format”: “parquet”}
-
-
-
-
-
-
-
-
-setUseBestModel ( value ) [source]
-Sets whether to restore and use the model that has achieved the best performance at the end of the training..
-The metric that is being monitored is macro F1 for the following cases(highest precendence first),
-
-Parameters
-
-value bool Whether to return the model that has achieved the best metrics across epochs.
-
-
-
-
-
-
-
-
-setUseContrib ( v ) [source]
-Sets whether to use contrib LSTM Cells. Not compatible with Windows.
-Might slightly improve accuracy.
-
-Parameters
-
-v bool Whether to use contrib LSTM Cells
-
-
-
-Raises
-
-Exception Windows not supported to use contrib
-
-
-
-
-
-
-
-
-setValidationSplit ( v ) [source]
-Sets the proportion of training dataset to be validated against the
-model on each Epoch, by default it is 0.0 and off. The value should be
-between 0.0 and 1.0.
-
-Parameters
-
-v float Proportion of training dataset to be validated
-
-
-
-
-
-
-
-
-setVerbose ( verboseValue )
-Sets level of verbosity during training.
-
-Parameters
-
-verboseValue int Level of verbosity
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalNerModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalNerModel.html
deleted file mode 100644
index 7833f017ed..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.MedicalNerModel.html
+++ /dev/null
@@ -1,1251 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.MedicalNerModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.MedicalNerModel
-
-
-class sparknlp_jsl.annotator. MedicalNerModel ( classname = 'com.johnsnowlabs.nlp.annotators.ner.MedicalNerModel' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
, sparknlp.common.HasStorageRef
, sparknlp.common.HasBatchedAnnotate
-This Named Entity recognition annotator is a generic NER model based on
-Neural Networks.
-Neural Network architecture is Char CNNs - BiLSTM - CRF that achieves
-state-of-the-art in most datasets.
-This is the instantiated model of the NerDLApproach
. For training
-your own model, please see the documentation of that class.
-Pretrained models can be loaded with pretrained()
of the companion
-object:
->>> nerModel = MedicalNerDLModel . pretrained () \
-... . setInputCols ([ "sentence" , "token" , "embeddings" ]) \
-... . setOutputCol ( "ner" )
-
-
-The default model is "ner_dl"
, if no name is provided.
-For available pretrained models please see the Models Hub .
-Additionally, pretrained pipelines are available for this module, see
-Pipelines .
-Note that some pretrained models require specific types of embeddings,
-depending on which they were trained on. For example, the default model
-"ner_dl"
requires the WordEmbeddings "glove_100d"
.
-For extended examples of usage, see the Spark NLP Workshop .
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, TOKEN, WORD_EMBEDDINGS
-NAMED_ENTITY
-
-
-
-
-Parameters
-
-batchSize Size of every batch, by default 8
-
-configProtoBytes ConfigProto from tensorflow, serialized into byte array.
-
-includeConfidence Whether to include confidence scores in annotation metadata, by default
-False
-
-includeAllConfidenceScores Whether to include all confidence scores in annotation metadata or just
-the score of the predicted tag, by default False
-
-inferenceBatchSize Number of sentences to process in a single batch during inference
-
-classes Tags used to trained this NerDLModel
-
-labelCasing: Setting all labels of the NER models upper/lower case. values upper|lower
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> documentAssembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
->>> sentence = SentenceDetector () \
-... . setInputCols ([ "document" ]) \
-... . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "sentence" ]) \
-... . setOutputCol ( "token" )
->>> embeddings = WordEmbeddingsModel . pretrained () \
-... . setInputCols ([ "sentence" , "token" ]) \
-... . setOutputCol ( "bert" )
->>> nerTagger = MedicalNerDLModel . pretrained () \
-... . setInputCols ([ "sentence" , "token" , "bert" ]) \
-... . setOutputCol ( "ner" )
->>> pipeline = Pipeline () . setStages ([
-... documentAssembler ,
-... sentence ,
-... tokenizer ,
-... embeddings ,
-... nerTagger
-... ])
->>> data = spark . createDataFrame ([[ "U.N. official Ekeus heads for Baghdad." ]]) . toDF ( "text" )
->>> result = pipeline . fit ( data ) . transform ( data )
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getBatchSize
()
-Gets current batch size.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-getStorageRef
()
-Gets unique reference name for identification.
-
-getTrainingClassDistribution
()
-
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-loadSavedModel
(ner_model_path, folder, ...)
-
-
-pretrained
([name, lang, remote_loc])
-
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBatchSize
(v)
-Sets batch size.
-
-setConfigProtoBytes
(b)
-Sets configProto from tensorflow, serialized into byte array.
-
-setIncludeConfidence
(value)
-Sets whether to include confidence scores in annotation metadata, by default False.
-
-setInferenceBatchSize
(value)
-Sets number of sentences to process in a single batch during inference
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLabelCasing
(value)
-Setting all labels of the NER models upper/lower case.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setStorageRef
(value)
-Sets unique reference name for identification.
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-batchSize
-
-
-classes
-
-
-configProtoBytes
-
-
-getter_attrs
-
-
-includeAllConfidenceScores
-
-
-includeConfidence
-
-
-inferenceBatchSize
-
-
-inputCols
-
-
-labelCasing
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-storageRef
-
-
-trainingClassDistribution
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getBatchSize ( )
-Gets current batch size.
-
-Returns
-
-int Current batch size
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-getStorageRef ( )
-Gets unique reference name for identification.
-
-Returns
-
-str Unique reference name for identification
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBatchSize ( v )
-Sets batch size.
-
-Parameters
-
-v int Batch size
-
-
-
-
-
-
-
-
-setConfigProtoBytes ( b ) [source]
-Sets configProto from tensorflow, serialized into byte array.
-
-Parameters
-
-b List[str] ConfigProto from tensorflow, serialized into byte array
-
-
-
-
-
-
-
-
-setIncludeConfidence ( value ) [source]
-Sets whether to include confidence scores in annotation metadata, by
-default False.
-
-Parameters
-
-value bool Whether to include the confidence value in the output.
-
-
-
-
-
-
-
-
-setInferenceBatchSize ( value ) [source]
-Sets number of sentences to process in a single batch during inference
-
-Parameters
-
-value int number of sentences to process in a single batch during inference
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLabelCasing ( value ) [source]
-Setting all labels of the NER models upper/lower case. values upper|lower
-
-Parameters
-
-value str Setting all labels of the NER models upper/lower case. values upper|lower
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setStorageRef ( value )
-Sets unique reference name for identification.
-
-Parameters
-
-value str Unique reference name for identification
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerChunker.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerChunker.html
deleted file mode 100644
index 9c012212fb..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerChunker.html
+++ /dev/null
@@ -1,1080 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.NerChunker — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.NerChunker
-
-
-class sparknlp_jsl.annotator. NerChunker ( classname = 'com.johnsnowlabs.nlp.annotators.ner.NerChunker' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-
-Extracts phrases that fits into a known pattern using the NER tags. Useful for entity groups with neighboring tokens when there is no pretrained NER model to address certain issues. A Regex needs to be provided to extract the tokens
-between entities.
-
-
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, CHUNK
-NAMED_ENTITY
-
-
-
-
-Parameters
-
-setRegexParsers A list of regex patterns to match chunks, for example: [“‹DT›?‹JJ›*‹NN”]
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp_jsl.base import *
->>> from sparknlp.annotator import *
->>> from sparknlp_jsl.annotator import *
->>> from sparknlp.training import *
->>> from pyspark.ml import Pipeline
-
-
->>> document_assembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
-...
->>> sentence_detector = SentenceDetector () \
-... . setInputCol ( "document" ) \
-... . setOutputCol ( "sentence" )
-...
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "sentence" ]) \
-... . setOutputCol ( "token" )
-...
->>> embeddings = WordEmbeddingsModel . pretrained ( "embeddings_clinical" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "token" ]) \
-... . setOutputCol ( "embeddings" ) ... . setCaseSensitive ( False )
-...
->>> ner = MedicalNerModel . pretrained ( "ner_radiology" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentence" , "token" , "embeddings" ]) \
-... . setOutputCol ( "ner" ) ... . setCaseSensitive ( False )
-...
->>> chunker = NerChunker () \
-... . setInputCols ([ "sentence" , "ner" ]) \
-... . setChunkCol ( "ner_chunk" ) \
-... . setOutputCol ( "chunk" )
-... . setRegexParsers ( Array ( "<ImagingFindings>.*<BodyPart>" ))
-...
-...
->>> pipeline = Pipeline ( stages = [
-... document_assembler ,
-... sentence_detector ,
-... tokenizer ,
-... embeddings ,
-... ner ,
-... chunker
-...])
->>> result = pipeline . fit . fit ( dataset ) . transform ( dataset )
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setRegexParsers
(b)
-Sets list of regex patterns to match chunks, for example: Array(“‹DT›?‹JJ›*‹NN›”
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-regexParsers
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setRegexParsers ( b ) [source]
-Sets list of regex patterns to match chunks, for example: Array(“‹DT›?‹JJ›*‹NN›”
-
-Parameters
-
-b List[String] list of regex patterns to match chunks, for example: Array(“‹DT›?‹JJ›*‹NN›”
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerConverterInternal.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerConverterInternal.html
deleted file mode 100644
index c6ec1fd043..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerConverterInternal.html
+++ /dev/null
@@ -1,1170 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.NerConverterInternal — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.NerConverterInternal
-
-
-class sparknlp_jsl.annotator. NerConverterInternal [source]
-Bases: sparknlp.common.AnnotatorModel
-Converts a IOB or IOB2 representation of NER to a user-friendly one,
-by associating the tokens of recognized entities and their label.
-Chunks with no associated entity (tagged “O”) are filtered.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-DOCUMENT, TOKEN, NAMED_ENTITY
-CHUNK
-
-
-
-
-Parameters
-
-whiteList If defined, list of entities to process. The rest will be ignored. Do not include IOB prefix on labels
-
-blackList If defined, list of entities to ignore. The rest will be proccessed. Do not include IOB prefix on labels
-
-preservePosition Whether to preserve the original position of the tokens in the original document or use the modified tokens
-
-greedyMode Whether to ignore B tags for contiguous tokens of same entity same
-
-threshold Confidence threshold to filter the chunk entities.
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> documentAssembler = DocumentAssembler () \
->>> data = spark . createDataFrame ([[ "A 63-year-old man presents to the hospital ..." ]]) . toDF ( "text" )
->>> documentAssembler = DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document" )
->>> sentenceDetector = SentenceDetector () . setInputCols ([ "document" ]) . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () . setInputCols ([ "sentence" ]) . setOutputCol ( "token" )
->>> embeddings = WordEmbeddingsModel . pretrained ( "embeddings_clinical" , "en" , "clinical/models" ) . setOutputCol ( "embs" )
->>> nerModel = MedicalNerModel . pretrained ( "ner_jsl" , "en" , "clinical/models" ) . setInputCols ([ "sentence" , "token" , "embs" ]) . setOutputCol ( "ner" )
->>> nerConverter = NerConverterInternal () . setInputCols ([ "sentence" , "token" , "ner" ]) . setOutputCol ( "ner_chunk" )
-...
->>> pipeline = Pipeline ( stages = [
-... documentAssembler ,
-... sentenceDetector ,
-... tokenizer ,
-... embeddings ,
-... nerModel ,
-... nerConverter ])
-
-
-Methods
-
-
-
-
-
-
-__init__
()
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setBlackList
(entities)
-If defined, list of entities to ignore.
-
-setGreedyMode
(p)
-Sets whether to ignore B tags for contiguous tokens of same entity same
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setPreservePosition
(p)
-Sets whether to preserve the original position of the tokens in the original document or use the modified tokens
-
-setReplaceDictResource
(path[, read_as, options])
-Sets replace dictionary pairs
-
-setThreshold
(p)
-Sets confidence threshold to filter the chunk entities.
-
-setWhiteList
(entities)
-If defined, list of entities to process.
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-blackList
-
-
-getter_attrs
-
-
-greedyMode
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-preservePosition
-
-
-replaceDictResource
-
-
-threshold
-
-
-whiteList
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setBlackList ( entities ) [source]
-If defined, list of entities to ignore. The rest will be processed. Do not include IOB prefix on labels
-
-Parameters
-
-entities list If defined, list of entities to ignore. The rest will be processed. Do not include IOB prefix on labels
-
-
-
-
-
-
-
-
-setGreedyMode ( p ) [source]
-Sets whether to ignore B tags for contiguous tokens of same entity same
-
-Parameters
-
-p bool Whether to ignore B tags for contiguous tokens of same entity same
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setPreservePosition ( p ) [source]
-Sets whether to preserve the original position of the tokens in the original document or use the modified tokens
-
-Parameters
-
-p bool Whether to preserve the original position of the tokens in the original document or use the modified tokens
-
-
-
-
-
-
-
-
-setReplaceDictResource ( path , read_as = 'TEXT' , options = {'delimiter': ','} ) [source]
-Sets replace dictionary pairs
-
-Parameters
-
-path str Path to the external resource
-
-read_as str, optional How to read the resource, by default ReadAs.TEXT
-
-options dict, optional Options for reading the resource, by default {“format”: “text”}
-
-
-
-
-
-
-
-
-setThreshold ( p ) [source]
-Sets confidence threshold to filter the chunk entities.
-
-Parameters
-
-p float Confidence threshold to filter the chunk entities.
-
-
-
-
-
-
-
-
-setWhiteList ( entities ) [source]
-If defined, list of entities to process. The rest will be ignored. Do not include IOB prefix on labels
-
-Parameters
-
-entities list If defined, list of entities to process. The rest will be ignored. Do not include IOB prefix on labels
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerDisambiguator.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerDisambiguator.html
deleted file mode 100644
index 50a0731017..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerDisambiguator.html
+++ /dev/null
@@ -1,1271 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.NerDisambiguator — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.NerDisambiguator
-
-
-class sparknlp_jsl.annotator. NerDisambiguator [source]
-Bases: sparknlp.common.AnnotatorApproach
-Links words of interest, such as names of persons, locations and companies, from an input text document to
-a corresponding unique entity in a target Knowledge Base (KB). Words of interest are called Named Entities (NEs),
-mentions, or surface forms.
-Instantiated / pretrained model of the NerDisambiguator.
-Links words of interest, such as names of persons, locations and companies, from an input text document to
-a corresponding unique entity in a target Knowledge Base (KB). Words of interest are called Named Entities (NEs),
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-CHUNK, SENTENCE_EMBEDDINGS
-DISAMBIGUATION
-
-
-
-
-Parameters
-
-embeddingTypeParam Could be ‘bow’ for word embeddings or ‘sentence’ for sentences
-
-numFirstChars How many characters should be considered for initial prefix search in knowledge base
-
-tokenSearch Should we search by token or by chunk in knowledge base (token is recommended)
-
-narrowWithApproximateMatching Should we narrow prefix search results with levenstein distance based matching (true is recommended)
-
-levenshteinDistanceThresholdParam Levenshtein distance threshold to narrow results from prefix search (0.1 is default)
-
-nearMatchingGapParam Puts a limit on a string length (by trimming the candidate chunks) during levenshtein-distance based narrowing,len(candidate) - len(entity chunk) > nearMatchingGap (Default: 4).
-
-predictionsLimit Limit on amount of predictions N for topN predictions
-
-s3KnowledgeBaseName knowledge base name in s3
-
-
-
-
-Examples
->>> data = spark . createDataFrame ([[ "The show also had a contestant named Donald Trump who later defeated Christina Aguilera ..." ]]) ... . toDF ( "text" )
->>> documentAssembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
->>> sentenceDetector = SentenceDetector () \
-... . setInputCols ([ "document" ]) \
-... . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "sentence" ]) \
-... . setOutputCol ( "token" )
->>> word_embeddings = WordEmbeddingsModel . pretrained () \
-... . setInputCols ([ "sentence" , "token" ]) \
-... . setOutputCol ( "embeddings" )
->>> sentence_embeddings = SentenceEmbeddings () \
-... . setInputCols ([ "sentence" , "embeddings" ]) \
-... . setOutputCol ( "sentence_embeddings" ) >>> ner_model = NerDLModel . pretrained () \
-... . setInputCols ([ "sentence" , "token" , "embeddings" ]) \
-... . setOutputCol ( "ner" )
->>> ner_converter = NerConverter () \
-... . setInputCols ([ "sentence" , "token" , "ner" ]) \
-... . setOutputCol ( "ner_chunk" ) \
-... . setWhiteList ([ "PER" ])
-
-
-Then the extracted entities can be disambiguated.
->>> disambiguator = NerDisambiguator() … .setS3KnowledgeBaseName(“i-per”) … .setInputCols([“ner_chunk”, “sentence_embeddings”]) … .setOutputCol(“disambiguation”) … .setNumFirstChars(5)
-…
->>> nlpPipeline = Pipeline(stages=[
-… documentAssembler,
-… sentenceDetector,
-… tokenizer,
-… word_embeddings,
-… sentence_embeddings,
-… ner_model,
-… ner_converter,
-… disambiguator])
-…
->>> model = nlpPipeline.fit(data)
->>> result = model.transform(data)
->>> result.selectExpr(“explode(disambiguation)”) … .selectExpr(“col.metadata.chunk as chunk”, “col.result as result”).show(5, False)
-
-
-
-
-
-
-chunk
-result
-
-Donald Trump
-Christina Aguilera
-http:#en.wikipedia.org/?curid=4848272, http:#en.wikipedia.org/?curid=31698421, http:#en.wikipedia.org/?curid=55907961
-http:#en.wikipedia.org/?curid=144171, http:#en.wikipedia.org/?curid=6636454
-
-
-
-Methods
-
-
-
-
-
-
-__init__
()
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-fit
(dataset[, params])
-Fits a model to the input dataset with optional parameters.
-
-fitMultiple
(dataset, paramMaps)
-Fits a model to the input dataset for each param map in paramMaps .
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setEmbeddingType
(value)
-Sets if we want to use 'bow' for word embeddings or 'sentence' for sentences"
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setLevenshteinDistanceThresholdParam
(value)
-Sets Levenshtein distance threshold to narrow results from prefix search (0.1 is default)
-
-setNarrowWithApproximateMatching
(value)
-Sets whether to narrow prefix search results with levenstein distance based matching (Default: true)
-
-setNearMatchingGapParam
(value)
-Sets a limit on a string length (by trimming the candidate chunks) during levenshtein-distance based narrowing.
-
-setNumFirstChars
(value)
-How many characters should be considered for initial prefix search in knowledge base
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setPredictionLimit
(value)
-Sets limit on amount of predictions N for topN predictions
-
-setS3KnowledgeBaseName
(value)
-Sets knowledge base name in s3
-
-setTokenSearch
(value)
-Sets whether to search by token or by chunk in knowledge base (Default: true)
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-embeddingTypeParam
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-levenshteinDistanceThresholdParam
-
-
-narrowWithApproximateMatching
-
-
-nearMatchingGapParam
-
-
-numFirstChars
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-predictionsLimit
-
-
-s3KnowledgeBaseName
-
-
-tokenSearch
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-fit ( dataset , params = None )
-Fits a model to the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-params dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of
-param maps is given, this calls fit on each param map and returns a list of
-models.
-
-
-
-Returns
-
-Transformer
or a list of Transformer
fitted model(s)
-
-
-
-
-
-
-
-
-fitMultiple ( dataset , paramMaps )
-Fits a model to the input dataset for each param map in paramMaps .
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-paramMaps collections.abc.Sequence
A Sequence of param maps.
-
-
-
-Returns
-
-_FitMultipleIterator
A thread safe iterable which contains one model for each param map. Each
-call to next(modelIterator) will return (index, model) where model was fit
-using paramMaps[index] . index values may not be sequential.
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setEmbeddingType ( value ) [source]
-Sets if we want to use ‘bow’ for word embeddings or ‘sentence’ for sentences”
-
-Parameters
-
-value str Can be ‘bow’ for word embeddings or ‘sentence’ for sentences (Default: sentence)
-Can be ‘bow’ for word embeddings or ‘sentence’ for sentences (Default: sentence)
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setLevenshteinDistanceThresholdParam ( value ) [source]
-Sets Levenshtein distance threshold to narrow results from prefix search (0.1 is default)
-
-Parameters
-
-value float Levenshtein distance threshold to narrow results from prefix search (0.1 is default)
-
-
-
-
-
-
-
-
-setNarrowWithApproximateMatching ( value ) [source]
-Sets whether to narrow prefix search results with levenstein distance based matching (Default: true)
-
-Parameters
-
-value bool Whether to narrow prefix search results with levenstein distance based matching (Default: true)
-
-
-
-
-
-
-
-
-setNearMatchingGapParam ( value ) [source]
-Sets a limit on a string length (by trimming the candidate chunks) during levenshtein-distance based narrowing.
-
-Parameters
-
-value int Limit on a string length (by trimming the candidate chunks) during levenshtein-distance based narrowing
-
-
-
-
-
-
-
-
-setNumFirstChars ( value ) [source]
-How many characters should be considered for initial prefix search in knowledge base
-
-Parameters
-
-value bool How many characters should be considered for initial prefix search in knowledge base
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setPredictionLimit ( value ) [source]
-Sets limit on amount of predictions N for topN predictions
-
-Parameters
-
-value bool Limit on amount of predictions N for topN predictions
-
-
-
-
-
-
-
-
-setS3KnowledgeBaseName ( value ) [source]
-Sets knowledge base name in s3
-
-Parameters
-
-value str knowledge base name in s3 example (i-per)
-
-
-
-
-
-
-
-
-setTokenSearch ( value ) [source]
-Sets whether to search by token or by chunk in knowledge base (Default: true)
-
-Parameters
-
-value bool Whether to search by token or by chunk in knowledge base (Default: true)
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerDisambiguatorModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerDisambiguatorModel.html
deleted file mode 100644
index 69a4b6fb6f..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.NerDisambiguatorModel.html
+++ /dev/null
@@ -1,1227 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.NerDisambiguatorModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.NerDisambiguatorModel
-
-
-class sparknlp_jsl.annotator. NerDisambiguatorModel ( classname = 'com.johnsnowlabs.nlp.annotators.disambiguation.NerDisambiguatorModel' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Links words of interest, such as names of persons, locations and companies, from an input text document to
-a corresponding unique entity in a target Knowledge Base (KB). Words of interest are called Named Entities (NEs),
-mentions, or surface forms.
-Instantiated / pretrained model of the NerDisambiguator.
-Links words of interest, such as names of persons, locations and companies, from an input text document to
-a corresponding unique entity in a target Knowledge Base (KB). Words of interest are called Named Entities (NEs),
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-CHUNK, SENTENCE_EMBEDDINGS
-DISAMBIGUATION
-
-
-
-
-Parameters
-
-embeddingTypeParam Could be bow
for word embeddings or sentence
for sentences
-
-numFirstChars How many characters should be considered for initial prefix search in knowledge base
-
-tokenSearch Should we search by token or by chunk in knowledge base (token is recommended)
-
-narrowWithApproximateMatching Should we narrow prefix search results with levenstein distance based matching (true is recommended)
-
-levenshteinDistanceThresholdParam Levenshtein distance threshold to narrow results from prefix search (0.1 is default)
-
-nearMatchingGapParam Puts a limit on a string length (by trimming the candidate chunks) during levenshtein-distance based narrowing,len(candidate) - len(entity chunk) > nearMatchingGap (Default: 4).
-
-predictionsLimit Limit on amount of predictions N for topN predictions
-
-s3KnowledgeBaseName knowledge base name in s3
-
-
-
-
-Examples
->>> data = spark . createDataFrame ([[ "The show also had a contestant named Donald Trump who later defeated Christina Aguilera ..." ]]) ... . toDF ( "text" )
->>> documentAssembler = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
->>> sentenceDetector = SentenceDetector () \
-... . setInputCols ([ "document" ]) \
-... . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "sentence" ]) \
-... . setOutputCol ( "token" )
->>> word_embeddings = WordEmbeddingsModel . pretrained () \
-... . setInputCols ([ "sentence" , "token" ]) \
-... . setOutputCol ( "embeddings" )
->>> sentence_embeddings = SentenceEmbeddings () \
-... . setInputCols ([ "sentence" , "embeddings" ]) \
-... . setOutputCol ( "sentence_embeddings" ) >>> ner_model = NerDLModel . pretrained () \
-... . setInputCols ([ "sentence" , "token" , "embeddings" ]) \
-... . setOutputCol ( "ner" )
->>> ner_converter = NerConverter () \
-... . setInputCols ([ "sentence" , "token" , "ner" ]) \
-... . setOutputCol ( "ner_chunk" ) \
-... . setWhiteList ([ "PER" ])
-
-
-Then the extracted entities can be disambiguated.
->>> disambiguator = NerDisambiguatorModel.pretrained() … .setInputCols([“ner_chunk”, “sentence_embeddings”]) … .setOutputCol(“disambiguation”) … .setNumFirstChars(5)
-…
->>> nlpPipeline = Pipeline(stages=[
-… documentAssembler,
-… sentenceDetector,
-… tokenizer,
-… word_embeddings,
-… sentence_embeddings,
-… ner_model,
-… ner_converter,
-… disambiguator])
-…
->>> model = nlpPipeline.fit(data)
->>> result = model.transform(data)
->>> result.selectExpr(“explode(disambiguation)”) … .selectExpr(“col.metadata.chunk as chunk”, “col.result as result”).show(5, False)
-
-
-
-
-
-
-chunk
-result
-
-Donald Trump
-Christina Aguilera
-http:#en.wikipedia.org/?curid=4848272, http:#en.wikipedia.org/?curid=31698421, http:#en.wikipedia.org/?curid=55907961
-http:#en.wikipedia.org/?curid=144171, http:#en.wikipedia.org/?curid=6636454
-
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-pretrained
([name, lang, remote_loc])
-
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setEmbeddingType
(value)
-Sets if we want to use 'bow' for word embeddings or 'sentence' for sentences
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setLevenshteinDistanceThresholdParam
(value)
-Sets Levenshtein distance threshold to narrow results from prefix search (0.1 is default)
-
-setNarrowWithApproximateMatching
(value)
-Sets whether to narrow prefix search results with levenstein distance based matching (Default: true)
-
-setNearMatchingGapParam
(value)
-Sets a limit on a string length (by trimming the candidate chunks) during levenshtein-distance based narrowing.
-
-setNumFirstChars
(value)
-How many characters should be considered for initial prefix search in knowledge base
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setPredictionLimit
(value)
-Sets limit on amount of predictions N for topN predictions
-
-setTokenSearch
(value)
-Sets whether to search by token or by chunk in knowledge base (Default: true)
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-embeddingTypeParam
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-levenshteinDistanceThresholdParam
-
-
-name
-
-
-narrowWithApproximateMatching
-
-
-nearMatchingGapParam
-
-
-numFirstChars
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-predictionsLimit
-
-
-tokenSearch
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setEmbeddingType ( value ) [source]
-Sets if we want to use ‘bow’ for word embeddings or ‘sentence’ for sentences
-
-Parameters
-
-value str Can be ‘bow’ for word embeddings or ‘sentence’ for sentences (Default: sentence)
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setLevenshteinDistanceThresholdParam ( value ) [source]
-Sets Levenshtein distance threshold to narrow results from prefix search (0.1 is default)
-
-Parameters
-
-value float Levenshtein distance threshold to narrow results from prefix search (0.1 is default)
-
-
-
-
-
-
-
-
-setNarrowWithApproximateMatching ( value ) [source]
-Sets whether to narrow prefix search results with levenstein distance based matching (Default: true)
-
-Parameters
-
-value bool Whether to narrow prefix search results with levenstein distance based matching (Default: true)
-
-
-
-
-
-
-
-
-setNearMatchingGapParam ( value ) [source]
-Sets a limit on a string length (by trimming the candidate chunks) during levenshtein-distance based narrowing.
-
-Parameters
-
-value int Limit on a string length (by trimming the candidate chunks) during levenshtein-distance based narrowing
-
-
-
-
-
-
-
-
-setNumFirstChars ( value ) [source]
-How many characters should be considered for initial prefix search in knowledge base
-
-Parameters
-
-value bool How many characters should be considered for initial prefix search in knowledge base
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setPredictionLimit ( value ) [source]
-Sets limit on amount of predictions N for topN predictions
-
-Parameters
-
-s bool Limit on amount of predictions N for topN predictions
-
-
-
-
-
-
-
-
-setTokenSearch ( value ) [source]
-Sets whether to search by token or by chunk in knowledge base (Default: true)
-
-Parameters
-
-value bool Whether to search by token or by chunk in knowledge base (Default: true)
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.PosologyREModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.PosologyREModel.html
deleted file mode 100644
index 9e6f781e45..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.PosologyREModel.html
+++ /dev/null
@@ -1,1078 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.PosologyREModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.PosologyREModel
-
-
-class sparknlp_jsl.annotator. PosologyREModel ( classname = 'com.johnsnowlabs.nlp.annotators.re.PosologyREModel' , java_model = None ) [source]
-Bases: sparknlp_jsl.annotator.RelationExtractionModel
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getClasses
()
-Returns labels used to train this model
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-pretrained
(name[, lang, remote_loc])
-
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setCustomLabels
(labels)
-Sets custom relation labels
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMaxSyntacticDistance
(distance)
-Sets maximal syntactic distance, as threshold (Default: 0)
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setPredictionThreshold
(threshold)
-Sets Minimal activation of the target unit to encode a new relation instance
-
-setRelationPairs
(pairs)
-Sets List of dash-separated pairs of named entities ("ENTITY1-ENTITY2", e.g.
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-classes
-
-
-customLabels
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-maxSyntacticDistance
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-predictionThreshold
-
-
-relationPairs
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getClasses ( )
-Returns labels used to train this model
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setCustomLabels ( labels )
-Sets custom relation labels
-
-Parameters
-
-labels dict[str, str] Dictionary which maps old to new labels
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMaxSyntacticDistance ( distance )
-Sets maximal syntactic distance, as threshold (Default: 0)
-
-Parameters
-
-b int Maximal syntactic distance, as threshold (Default: 0)
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setPredictionThreshold ( threshold )
-Sets Minimal activation of the target unit to encode a new relation instance
-
-Parameters
-
-threshold float Minimal activation of the target unit to encode a new relation instance
-
-
-
-
-
-
-
-
-setRelationPairs ( pairs )
-Sets List of dash-separated pairs of named entities (“ENTITY1-ENTITY2”, e.g. “Biomarker-RelativeDay”), which will be processed
-
-Parameters
-
-pairs str List of dash-separated pairs of named entities (“ENTITY1-ENTITY2”, e.g. “Biomarker-RelativeDay”), which will be processed
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RENerChunksFilter.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RENerChunksFilter.html
deleted file mode 100644
index 0dbe84de12..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RENerChunksFilter.html
+++ /dev/null
@@ -1,1145 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.RENerChunksFilter — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.RENerChunksFilter
-
-
-class sparknlp_jsl.annotator. RENerChunksFilter ( classname = 'com.johnsnowlabs.nlp.annotators.re.RENerChunksFilter' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Filters and outputs combinations of relations between extracted entities, for further processing.
-This annotator is especially useful to create inputs for the RelationExtractionDLModel.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-CHUNK,DEPENDENCY
-CHUNK
-
-
-
-
-Parameters
-
-relationPairs List of valid relations to encode
-
-maxSyntacticDistance Maximum syntactic distance between a pair of named entities to consider them as a relation
-
-docLevelRelations Include relations between entities from different sentences (Default: False)
-
-
-
-
-Examples
->>> documenter = DocumentAssembler () \
-... . setInputCol ( "text" ) \
-... . setOutputCol ( "document" )
-...
->>> sentencer = SentenceDetector () \
-... . setInputCols ([ "document" ]) \
-... . setOutputCol ( "sentences" )
-...
->>> tokenizer = Tokenizer () \
-... . setInputCols ([ "sentences" ]) \
-... . setOutputCol ( "tokens" )
-...
->>> words_embedder = WordEmbeddingsModel . pretrained ( "embeddings_clinical" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentences" , "tokens" ]) \
-... . setOutputCol ( "embeddings" )
-...
->>> pos_tagger = PerceptronModel . pretrained ( "pos_clinical" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentences" , "tokens" ]) \
-... . setOutputCol ( "pos_tags" )
-...
->>> dependency_parser = DependencyParserModel . pretrained ( "dependency_conllu" , "en" ) \
-... . setInputCols ([ "sentences" , "pos_tags" , "tokens" ]) \
-... . setOutputCol ( "dependencies" )
-...
->>> clinical_ner_tagger = MedicalNerModel . pretrained ( "jsl_ner_wip_greedy_clinical" , "en" , "clinical/models" ) \
-... . setInputCols ([ "sentences" , "tokens" , "embeddings" ]) \
-... . setOutputCol ( "ner_tags" )
-...
->>> ner_chunker = NerConverter () \
-... . setInputCols ([ "sentences" , "tokens" , "ner_tags" ]) \
-... . setOutputCol ( "ner_chunks" )
-...
-... # Define the relation pairs and the filter
->>> relationPairs = [
-... "direction-external_body_part_or_region" ,
-... "external_body_part_or_region-direction" ,
-... "direction-internal_organ_or_component" ,
-... "internal_organ_or_component-direction"
-... ]
-...
->>> re_ner_chunk_filter = RENerChunksFilter () \
-... . setInputCols ([ "ner_chunks" , "dependencies" ]) \
-... . setOutputCol ( "re_ner_chunks" ) \
-... . setMaxSyntacticDistance ( 4 ) \
-... . setRelationPairs ([ "internal_organ_or_component-direction" ])
-...
->>> trained_pipeline = Pipeline ( stages = [
-... documenter ,
-... sentencer ,
-... tokenizer ,
-... words_embedder ,
-... pos_tagger ,
-... clinical_ner_tagger ,
-... ner_chunker ,
-... dependency_parser ,
-... re_ner_chunk_filter
-... ])
-...
->>> data = spark . cre >>> DataFrame ([[ "MRI demonstrated infarction in the upper brain stem , left cerebellum and right basil ganglia" ]]) . toDF ( "text" )
->>> result = trained_pipeline . fit ( data ) . transform ( data )
-...
-... # Show results
->>> result . selectExpr ( "explode(re_ner_chunks) as re_chunks" ) ... . selectExpr ( "re_chunks.begin" , "re_chunks.result" , "re_chunks.metadata.entity" , "re_chunks.metadata.paired_to" ) ... . show ( 6 , truncate = False )
-+-----+-------------+---------------------------+---------+
-|begin|result |entity |paired_to|
-+-----+-------------+---------------------------+---------+
-|35 |upper |Direction |41 |
-|41 |brain stem |Internal_organ_or_component|35 |
-|35 |upper |Direction |59 |
-|59 |cerebellum |Internal_organ_or_component|35 |
-|35 |upper |Direction |81 |
-|81 |basil ganglia|Internal_organ_or_component|35 |
-+-----+-------------+---------------------------+---------+
-+---------------------------------------------------------+
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setDocLevelRelations
(docLevelRelations)
-Sets whether to include relations between entities from different sentences
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMaxSyntacticDistance
(distance)
-Sets maximum syntactic distance between a pair of named entities to consider them as a relation"
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setRelationPairs
(pairs)
-Sets list of dash-separated pairs of named entities
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-docLevelRelations
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-maxSyntacticDistance
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-relationPairs
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setDocLevelRelations ( docLevelRelations ) [source]
-Sets whether to include relations between entities from different sentences
-
-Parameters
-
-docLevelRelations bool Whether to include relations between entities from different sentences
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMaxSyntacticDistance ( distance ) [source]
-Sets maximum syntactic distance between a pair of named entities to consider them as a relation”
-
-Parameters
-
-distance int Maximum syntactic distance between a pair of named entities to consider them as a relation
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setRelationPairs ( pairs ) [source]
-Sets list of dash-separated pairs of named entities
-
-Parameters
-
-pairs str List of dash-separated pairs of named entities
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ReIdentification.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ReIdentification.html
deleted file mode 100644
index 1c5c95e14c..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.ReIdentification.html
+++ /dev/null
@@ -1,978 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ReIdentification — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.ReIdentification
-
-
-class sparknlp_jsl.annotator. ReIdentification ( classname = 'com.johnsnowlabs.nlp.annotators.deid.ReIdentification' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-getter_attrs
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RelationExtractionApproach.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RelationExtractionApproach.html
deleted file mode 100644
index 51125d5cb6..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RelationExtractionApproach.html
+++ /dev/null
@@ -1,1364 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.RelationExtractionApproach — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RelationExtractionDLModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RelationExtractionDLModel.html
deleted file mode 100644
index 0b39a2c6bb..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RelationExtractionDLModel.html
+++ /dev/null
@@ -1,1191 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.RelationExtractionDLModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RelationExtractionModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RelationExtractionModel.html
deleted file mode 100644
index 2d69c03f8a..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.RelationExtractionModel.html
+++ /dev/null
@@ -1,1203 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.RelationExtractionModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.Router.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.Router.html
deleted file mode 100644
index 6c1b1392f0..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.Router.html
+++ /dev/null
@@ -1,1105 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.Router — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.Router
-
-
-class sparknlp_jsl.annotator. Router ( classname = 'com.johnsnowlabs.nlp.Router' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
-Convert chunks from regexMatcher to chunks with a entity in the metadata.
-Use the identifier or field as a entity.
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-ANY
-ANY
-
-
-
-
-Parameters
-
-inputType The type of the entity that you want to filter by default sentence_embeddings.Possible values
-document|token|wordpiece|word_embeddings|sentence_embeddings|category|date|sentiment|pos|chunk|named_entity|regex|dependency|labeled_dependency|language|keyword
-
-filterFieldsElements The filterfieldsElements are the allowed values for the metadata field that is being used
-
-metadataField The key in the metadata dictionary that you want to filter (by default entity
)
-
-
-
-
-Examples
->>> test_data = spark . createDataFrame ( sentences ) . toDF ( "text" )
->>> document = DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document" )
->>> sentence = SentenceDetector () . setInputCols ( "document" ) . setOutputCol ( "sentence" )
->>> regexMatcher = RegexMatcher () . setExternalRules ( "../src/test/resources/regex-matcher/rules2.txt" , "," ) \
-... . setInputCols ( "sentence" ) \
-... . setOutputCol ( "regex" ) \
-... . setStrategy ( "MATCH_ALL" )
->>> chunk2Doc = Chunk2Doc () . setInputCols ( "regex" ) . setOutputCol ( "doc_chunk" )
->>> embeddings = BertSentenceEmbeddings . pretrained ( "sent_small_bert_L2_128" ) \
-... . setInputCols ( "doc_chunk" ) \
-... . setOutputCol ( "bert" ) \
-... . setCaseSensitive ( False ) \
-... . setMaxSentenceLength ( 32 )
->>> router_name_embeddings = Router () \
-... . setInputType ( "sentence_embeddings" ) \
-... . setInputCols ( "bert" ) \
-... . setMetadataField ( "identifier" ) \
-... . setFilterFieldsElements ([ "name" ]) \
-... . setOutputCol ( "names_embeddings" ) >>> router_city_embeddings = Router () \
-... . setInputType ( "sentence_embeddings" ) \
-... . setInputCols ([ "bert" ]) \
-... . setMetadataField ( "identifier" ) \
-... . setFilterFieldsElements ([ "city" ]) \
-... . setOutputCol ( "cities_embeddings" )
->>> router_names = Router () \
-... . setInputType ( "chunk" ) \
-... . setInputCols ( "regex" ) \
-... . setMetadataField ( "identifier" ) \
-... . setFilterFieldsElements ([ "name" ]) \
-... . setOutputCol ( "names_chunks" )
->>> pipeline = Pipeline () . setStages (
->>> [ document , sentence , regexMatcher , chunk2Doc , router_names , embeddings , router_name_embeddings ,
-... router_city_embeddings ])
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setFilterFieldsElements
(value)
-Sets the filterfieldsElements are the allowed values for the metadata field that is being used
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setInputType
(value)
-Sets the type of the entity that you want to filter by default sentence_embedding
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMetadataField
(value)
-Sets the key in the metadata dictionary that you want to filter (by default 'entity')
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-filterFieldsElements
-
-
-getter_attrs
-
-
-inputCols
-
-
-inputType
-
-
-lazyAnnotator
-
-
-metadataField
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setFilterFieldsElements ( value ) [source]
-Sets the filterfieldsElements are the allowed values for the metadata field that is being used
-
-Parameters
-
-value list The filterfieldsElements are the allowed values for the metadata field that is being used
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setInputType ( value ) [source]
-Sets the type of the entity that you want to filter by default sentence_embedding
-
-Parameters
-
-value int The type of the entity that you want to filter by default sentence_embedding
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMetadataField ( value ) [source]
-Sets the key in the metadata dictionary that you want to filter (by default ‘entity’)
-
-Parameters
-
-value str The key in the metadata dictionary that you want to filter (by default ‘entity’)
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.SentenceEntityResolverApproach.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.SentenceEntityResolverApproach.html
deleted file mode 100644
index d1d7a95c96..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.SentenceEntityResolverApproach.html
+++ /dev/null
@@ -1,1345 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.SentenceEntityResolverApproach — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.SentenceEntityResolverApproach
-
-
-class sparknlp_jsl.annotator. SentenceEntityResolverApproach [source]
-Bases: sparknlp.common.AnnotatorApproach
, sparknlp_jsl.annotator.SentenceResolverParams
-Thius class contains all the parameters and methods to train a SentenceEntityResolverModel.
-The model transforms a dataset with Input Annotation type SENTENCE_EMBEDDINGS, coming from e.g.
-[BertSentenceEmbeddings](/docs/en/transformers#bertsentenceembeddings)
-and returns the normalized entity for a particular trained ontology / curated dataset.
-(e.g. ICD-10, RxNorm, SNOMED etc.)
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-SENTENCE_EMBEDDINGS
-ENTITY
-
-
-
-
-Parameters
-
-labelCol Column name for the value we are trying to resolve
-
-normalizedCol Column name for the original, normalized description
-
-pretrainedModelPath Path to an already trained SentenceEntityResolverModel, which is used as a starting point for training the new model.
-
-overrideExistingCodes Whether to override the existing codes with new data while continue the training from a pretrained model. Default value is false(keep all the codes).
-
-returnCosineDistances Extract Cosine Distances. TRUE or False
-
-aux_label_col Auxiliary label which maps resolved entities to additional labels
-
-useAuxLabel Use AuxLabel Col or not
-
-overrideExistingCodes Whether to override the codes present in a pretrained model with new codes when the training process begins with a pretrained model
-
-dropCodesList A list of codes in a pretrained model that will be omitted when the training process begins with a pretrained model
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> documentAssembler = DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document" )
->>> sentenceDetector = SentenceDetector () . setInputCols ([ "document" ]) . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () . setInputCols ([ "sentence" ]) . setOutputCol ( "token" )
->>> bertEmbeddings = BertSentenceEmbeddings . pretrained ( "sent_biobert_pubmed_base_cased" ) \
-... . setInputCols ([ "sentence" ]) \
-... . setOutputCol ( "embeddings" )
->>> snomedTrainingPipeline = Pipeline ( stages = [
-... documentAssembler ,
-... sentenceDetector ,
-... bertEmbeddings ,
-... ])
->>> snomedTrainingModel = snomedTrainingPipeline . fit ( data )
->>> snomedData = snomedTrainingModel . transform ( data ) . cache ()
->>> assertionModel = assertionPipeline . fit ( data )
->>> assertionModel = assertionPipeline . fit ( data )
-
-
->>> bertExtractor = SentenceEntityResolverApproach () \
-... . setNeighbours ( 25 ) \
-... . setThreshold ( 1000 ) \
-... . setInputCols ([ "bert_embeddings" ]) \
-... . setNormalizedCol ( "normalized_text" ) \
-... . setLabelCol ( "label" ) \
-... . setOutputCol ( "snomed_code" ) \
-... . setDistanceFunction ( "EUCLIDIAN" ) \
-... . setCaseSensitive ( False )
-
-
->>> snomedModel = bertExtractor . fit ( snomedData )
-
-
-Methods
-
-
-
-
-
-
-__init__
()
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-fit
(dataset[, params])
-Fits a model to the input dataset with optional parameters.
-
-fitMultiple
(dataset, paramMaps)
-Fits a model to the input dataset for each param map in paramMaps .
-
-getCaseSensitive
()
-Gets whether to ignore case in tokens for embeddings matching.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setAuxLabelCol
(name)
-Sets auxiliary label which maps resolved entities to additional labels
-
-setCaseSensitive
(value)
-Sets whether to ignore case in tokens for embeddings matching.
-
-setConfidenceFunction
(s)
-What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-setDistanceFunction
(dist)
-Sets distance function to use for WMD: 'EUCLIDEAN' or 'COSINE'.
-
-setDropCodesList
(value)
-
-
-setExtractCosineDistances
(name)
-Extract Cosine Distances.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLabelCol
(name)
-Sets column name for the value we are trying to resolve
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMissAsEmpty
(value)
-Sets whether or not to return an empty annotation on unmatched chunks.
-
-setNeighbours
(k)
-Sets number of neighbours to consider in the KNN query to calculate WMD.
-
-setNormalizedCol
(name)
-Sets column name for the original, normalized description
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setOverrideExistingCodes
(value)
-
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setPretrainedModelPath
(path)
-
-
-setThreshold
(thres)
-Sets Threshold value for the last distance calculated.
-
-setUseAuxLabel
(name)
-Sets Use AuxLabel Col or not.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-aux_label_col
-
-
-caseSensitive
-
-
-confidenceFunction
-
-
-distanceFunction
-
-
-dropCodesList
-
-
-getter_attrs
-
-
-inputCols
-
-
-labelCol
-
-
-lazyAnnotator
-
-
-missAsEmpty
-
-
-neighbours
-
-
-normalizedCol
-
-
-outputCol
-
-
-overrideExistingCodes
-
-
-params
-Returns all params ordered by name.
-
-pretrainedModelPath
-
-
-returnCosineDistances
-
-
-threshold
-
-
-useAuxLabel
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-fit ( dataset , params = None )
-Fits a model to the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-params dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of
-param maps is given, this calls fit on each param map and returns a list of
-models.
-
-
-
-Returns
-
-Transformer
or a list of Transformer
fitted model(s)
-
-
-
-
-
-
-
-
-fitMultiple ( dataset , paramMaps )
-Fits a model to the input dataset for each param map in paramMaps .
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset.
-
-paramMaps collections.abc.Sequence
A Sequence of param maps.
-
-
-
-Returns
-
-_FitMultipleIterator
A thread safe iterable which contains one model for each param map. Each
-call to next(modelIterator) will return (index, model) where model was fit
-using paramMaps[index] . index values may not be sequential.
-
-
-
-
-
-
-
-
-getCaseSensitive ( )
-Gets whether to ignore case in tokens for embeddings matching.
-
-Returns
-
-bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setAuxLabelCol ( name ) [source]
-Sets auxiliary label which maps resolved entities to additional labels
-
-Parameters
-
-name str Auxiliary label which maps resolved entities to additional labels
-
-
-
-
-
-
-
-
-setCaseSensitive ( value )
-Sets whether to ignore case in tokens for embeddings matching.
-
-Parameters
-
-value bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-setConfidenceFunction ( s )
-What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-Parameters
-
-s str What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-
-
-
-
-
-
-
-setDistanceFunction ( dist )
-Sets distance function to use for WMD: ‘EUCLIDEAN’ or ‘COSINE’.
-
-Parameters
-
-dist str Value that selects what distance function to use for WMD: ‘EUCLIDEAN’ or ‘COSINE’.
-
-
-
-
-
-
-
-
-Extract Cosine Distances. TRUE or False.
-
-Parameters
-
-name bool Extract Cosine Distances. TRUE or False
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLabelCol ( name ) [source]
-Sets column name for the value we are trying to resolve
-
-Parameters
-
-s bool Column name for the value we are trying to resolve
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMissAsEmpty ( value )
-Sets whether or not to return an empty annotation on unmatched chunks.
-
-Parameters
-
-value bool whether or not to return an empty annotation on unmatched chunks.
-
-
-
-
-
-
-
-
-setNeighbours ( k )
-Sets number of neighbours to consider in the KNN query to calculate WMD.
-
-Parameters
-
-k int Number of neighbours to consider in the KNN query to calculate WMD.
-
-
-
-
-
-
-
-
-setNormalizedCol ( name ) [source]
-Sets column name for the original, normalized description
-
-Parameters
-
-s bool Column name for the original, normalized description
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setThreshold ( thres )
-Sets Threshold value for the last distance calculated.
-
-Parameters
-
-thres float Threshold value for the last distance calculated.
-
-
-
-
-
-
-
-
-setUseAuxLabel ( name ) [source]
-Sets Use AuxLabel Col or not.
-
-Parameters
-
-name bool Use AuxLabel Col or not.
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.SentenceEntityResolverModel.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.SentenceEntityResolverModel.html
deleted file mode 100644
index 0d07298c64..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.SentenceEntityResolverModel.html
+++ /dev/null
@@ -1,1385 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.SentenceEntityResolverModel — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.SentenceEntityResolverModel
-
-
-class sparknlp_jsl.annotator. SentenceEntityResolverModel ( classname = 'com.johnsnowlabs.nlp.annotators.resolution.SentenceEntityResolverModel' , java_model = None ) [source]
-Bases: sparknlp.common.AnnotatorModel
, sparknlp.common.HasEmbeddingsProperties
, sparknlp.common.HasStorageModel
, sparknlp_jsl.annotator.SentenceResolverParams
-Thius class contains all the parameters and methods to train a SentenceEntityResolverModel.
-The model transforms a dataset with Input Annotation type SENTENCE_EMBEDDINGS, coming from e.g.
-[BertSentenceEmbeddings](/docs/en/transformers#bertsentenceembeddings)
-and returns the normalized entity for a particular trained ontology / curated dataset.
-(e.g. ICD-10, RxNorm, SNOMED etc.)
-
-
-
-
-
-
-Input Annotation types
-Output Annotation type
-
-
-
-SENTENCE_EMBEDDINGS
-ENTITY
-
-
-
-
-Parameters
-
-returnCosineDistances Extract Cosine Distances. TRUE or False
-
-aux_label_col Auxiliary label which maps resolved entities to additional labels
-
-useAuxLabel Use AuxLabel Col or not
-
-searchTree Search tree for resolution
-
-
-
-
-Examples
->>> import sparknlp
->>> from sparknlp.base import *
->>> from sparknlp.common import *
->>> from sparknlp.annotator import *
->>> from sparknlp.training import *
->>> import sparknlp_jsl
->>> from sparknlp_jsl.base import *
->>> from sparknlp_jsl.annotator import *
->>> from pyspark.ml import Pipeline
->>> documentAssembler = DocumentAssembler () . setInputCol ( "text" ) . setOutputCol ( "document" )
->>> sentenceDetector = SentenceDetector () . setInputCols ([ "document" ]) . setOutputCol ( "sentence" )
->>> tokenizer = Tokenizer () . setInputCols ([ "sentence" ]) . setOutputCol ( "token" )
->>> bertEmbeddings = BertSentenceEmbeddings . pretrained ( "sent_biobert_pubmed_base_cased" ) \
-... . setInputCols ([ "sentence" ]) \
-... . setOutputCol ( "embeddings" )
->>> snomedTrainingPipeline = Pipeline ( stages = [
-... documentAssembler ,
-... sentenceDetector ,
-... bertEmbeddings ,
-... ])
->>> snomedTrainingModel = snomedTrainingPipeline . fit ( data )
->>> snomedData = snomedTrainingModel . transform ( data ) . cache ()
->>> assertionModel = assertionPipeline . fit ( data )
->>> assertionModel = assertionPipeline . fit ( data )
-
-
->>> bertExtractor = SentenceEntityResolverApproach () \
-... . setNeighbours ( 25 ) \
-... . setThreshold ( 1000 ) \
-... . setInputCols ([ "bert_embeddings" ]) \
-... . setNormalizedCol ( "normalized_text" ) \
-... . setLabelCol ( "label" ) \
-... . setOutputCol ( "snomed_code" ) \
-... . setDistanceFunction ( "EUCLIDIAN" ) \
-... . setCaseSensitive ( False )
-
-
->>> snomedModel = bertExtractor . fit ( snomedData )
-
-
-Methods
-
-
-
-
-
-
-__init__
([classname, java_model])
-Initialize this instance with a Java model object.
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getCaseSensitive
()
-Gets whether to ignore case in tokens for embeddings matching.
-
-getDimension
()
-Gets embeddings dimension.
-
-getIncludeStorage
()
-Gets whether to include indexed storage in trained model.
-
-getInputCols
()
-Gets current column names of input annotations.
-
-getLazyAnnotator
()
-Gets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getOutputCol
()
-Gets output column name of annotations.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-getStorageRef
()
-Gets unique reference name for identification.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-loadStorage
(path, spark, storage_ref)
-
-
-loadStorages
(path, spark, storage_ref, databases)
-
-
-pretrained
(name[, lang, remote_loc])
-
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-saveStorage
(path, spark)
-Saves the current model to storage.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setAuxLabelCol
(name)
-Sets auxiliary label which maps resolved entities to additional labels
-
-setCaseSensitive
(value)
-Sets whether to ignore case in tokens for embeddings matching.
-
-setConfidenceFunction
(s)
-What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-setDimension
(value)
-Sets embeddings dimension.
-
-setDistanceFunction
(dist)
-Sets distance function to use for WMD: 'EUCLIDEAN' or 'COSINE'.
-
-setIncludeStorage
(value)
-Sets whether to include indexed storage in trained model.
-
-setInputCols
(*value)
-Sets column names of input annotations.
-
-setLazyAnnotator
(value)
-Sets whether Annotator should be evaluated lazily in a RecursivePipeline.
-
-setMissAsEmpty
(value)
-Sets whether or not to return an empty annotation on unmatched chunks.
-
-setNeighbours
(k)
-Sets number of neighbours to consider in the KNN query to calculate WMD.
-
-setOutputCol
(value)
-Sets output column name of annotations.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-setSearchTree
(s)
-Sets auxiliary label which maps resolved entities to additional labels
-
-setStorageRef
(value)
-Sets unique reference name for identification.
-
-setThreshold
(thres)
-Sets Threshold value for the last distance calculated.
-
-setUseAuxLabel
(name)
-Sets Use AuxLabel Col or not.
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-aux_label_col
-
-
-caseSensitive
-
-
-confidenceFunction
-
-
-dimension
-
-
-distanceFunction
-
-
-getter_attrs
-
-
-includeStorage
-
-
-inputCols
-
-
-lazyAnnotator
-
-
-missAsEmpty
-
-
-name
-
-
-neighbours
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-returnCosineDistances
-
-
-searchTree
-
-
-storageRef
-
-
-threshold
-
-
-useAuxLabel
-
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getCaseSensitive ( )
-Gets whether to ignore case in tokens for embeddings matching.
-
-Returns
-
-bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-getDimension ( )
-Gets embeddings dimension.
-
-
-
-
-getIncludeStorage ( )
-Gets whether to include indexed storage in trained model.
-
-Returns
-
-bool Whether to include indexed storage in trained model
-
-
-
-
-
-
-
-
-getInputCols ( )
-Gets current column names of input annotations.
-
-
-
-
-getLazyAnnotator ( )
-Gets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getOutputCol ( )
-Gets output column name of annotations.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-getStorageRef ( )
-Gets unique reference name for identification.
-
-Returns
-
-str Unique reference name for identification
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-saveStorage ( path , spark )
-Saves the current model to storage.
-
-Parameters
-
-path str Path for saving the model.
-
-spark pyspark.sql.SparkSession
The current SparkSession
-
-
-
-
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setAuxLabelCol ( name ) [source]
-Sets auxiliary label which maps resolved entities to additional labels
-
-Parameters
-
-name str Auxiliary label which maps resolved entities to additional labels
-
-
-
-
-
-
-
-
-setCaseSensitive ( value )
-Sets whether to ignore case in tokens for embeddings matching.
-
-Parameters
-
-value bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-setConfidenceFunction ( s )
-What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-Parameters
-
-s str What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-
-
-
-
-
-
-
-setDimension ( value )
-Sets embeddings dimension.
-
-Parameters
-
-value int Embeddings dimension
-
-
-
-
-
-
-
-
-setDistanceFunction ( dist )
-Sets distance function to use for WMD: ‘EUCLIDEAN’ or ‘COSINE’.
-
-Parameters
-
-dist str Value that selects what distance function to use for WMD: ‘EUCLIDEAN’ or ‘COSINE’.
-
-
-
-
-
-
-
-
-setIncludeStorage ( value )
-Sets whether to include indexed storage in trained model.
-
-Parameters
-
-value bool Whether to include indexed storage in trained model
-
-
-
-
-
-
-
-
-setInputCols ( * value )
-Sets column names of input annotations.
-
-Parameters
-
-*value str Input columns for the annotator
-
-
-
-
-
-
-
-
-setLazyAnnotator ( value )
-Sets whether Annotator should be evaluated lazily in a
-RecursivePipeline.
-
-Parameters
-
-value bool Whether Annotator should be evaluated lazily in a
-RecursivePipeline
-
-
-
-
-
-
-
-
-setMissAsEmpty ( value )
-Sets whether or not to return an empty annotation on unmatched chunks.
-
-Parameters
-
-value bool whether or not to return an empty annotation on unmatched chunks.
-
-
-
-
-
-
-
-
-setNeighbours ( k )
-Sets number of neighbours to consider in the KNN query to calculate WMD.
-
-Parameters
-
-k int Number of neighbours to consider in the KNN query to calculate WMD.
-
-
-
-
-
-
-
-
-setOutputCol ( value )
-Sets output column name of annotations.
-
-Parameters
-
-value str Name of output column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-setSearchTree ( s ) [source]
-Sets auxiliary label which maps resolved entities to additional labels
-
-Parameters
-
-name str Auxiliary label which maps resolved entities to additional labels
-
-
-
-
-
-
-
-
-setStorageRef ( value )
-Sets unique reference name for identification.
-
-Parameters
-
-value str Unique reference name for identification
-
-
-
-
-
-
-
-
-setThreshold ( thres )
-Sets Threshold value for the last distance calculated.
-
-Parameters
-
-thres float Threshold value for the last distance calculated.
-
-
-
-
-
-
-
-
-setUseAuxLabel ( name ) [source]
-Sets Use AuxLabel Col or not.
-
-Parameters
-
-name bool Use AuxLabel Col or not.
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.SentenceResolverParams.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.SentenceResolverParams.html
deleted file mode 100644
index e3548614f5..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.SentenceResolverParams.html
+++ /dev/null
@@ -1,769 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.SentenceResolverParams — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator.SentenceResolverParams
-
-
-class sparknlp_jsl.annotator. SentenceResolverParams [source]
-Bases: sparknlp.common.HasCaseSensitiveProperties
-Class used to have a common interface Sentence Resolver family.
-
-Parameters
-
-distanceFunction What distance function to use for WMD: ‘EUCLIDEAN’ or ‘COSINE’.
-
-neighbours Number of neighbours to consider in the KNN query to calculate WMD
-
-threshold Threshold value for the last distance calculated.
-
-confidenceFunction What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-missAsEmpty Whether or not to return an empty annotation on unmatched chunks.
-
-
-
-
-Methods
-
-
-
-
-
-
-__init__
(*args, **kwargs)
-
-
-getCaseSensitive
()
-Gets whether to ignore case in tokens for embeddings matching.
-
-setCaseSensitive
(value)
-Sets whether to ignore case in tokens for embeddings matching.
-
-setConfidenceFunction
(s)
-What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-setDistanceFunction
(dist)
-Sets distance function to use for WMD: 'EUCLIDEAN' or 'COSINE'.
-
-setMissAsEmpty
(value)
-Sets whether or not to return an empty annotation on unmatched chunks.
-
-setNeighbours
(k)
-Sets number of neighbours to consider in the KNN query to calculate WMD.
-
-setThreshold
(thres)
-Sets Threshold value for the last distance calculated.
-
-
-
-Attributes
-
-
-
-
-
-
-caseSensitive
-
-
-confidenceFunction
-
-
-distanceFunction
-
-
-missAsEmpty
-
-
-neighbours
-
-
-threshold
-
-
-
-
-
-
-getCaseSensitive ( )
-Gets whether to ignore case in tokens for embeddings matching.
-
-Returns
-
-bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-setCaseSensitive ( value )
-Sets whether to ignore case in tokens for embeddings matching.
-
-Parameters
-
-value bool Whether to ignore case in tokens for embeddings matching
-
-
-
-
-
-
-
-
-setConfidenceFunction ( s ) [source]
-What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-Parameters
-
-s str What function to use to calculate confidence: INVERSE or SOFTMAX.
-
-
-
-
-
-
-
-
-setDistanceFunction ( dist ) [source]
-Sets distance function to use for WMD: ‘EUCLIDEAN’ or ‘COSINE’.
-
-Parameters
-
-dist str Value that selects what distance function to use for WMD: ‘EUCLIDEAN’ or ‘COSINE’.
-
-
-
-
-
-
-
-
-setMissAsEmpty ( value ) [source]
-Sets whether or not to return an empty annotation on unmatched chunks.
-
-Parameters
-
-value bool whether or not to return an empty annotation on unmatched chunks.
-
-
-
-
-
-
-
-
-setNeighbours ( k ) [source]
-Sets number of neighbours to consider in the KNN query to calculate WMD.
-
-Parameters
-
-k int Number of neighbours to consider in the KNN query to calculate WMD.
-
-
-
-
-
-
-
-
-setThreshold ( thres ) [source]
-Sets Threshold value for the last distance calculated.
-
-Parameters
-
-thres float Threshold value for the last distance calculated.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.html
deleted file mode 100644
index b4342eaabd..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.annotator.html
+++ /dev/null
@@ -1,746 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.annotator
-
Classes
-
-
-
-
-
-
-AnnotationMerger
-Merges Annotations from multiple columns.
-
-AssertionDLApproach
-Train a Assertion Model algorithm using deep learning.
-
-AssertionDLModel
-AssertionDL is a deep Learning based approach used to extract Assertion Status from extracted entities and text.
-
-AssertionFilterer
-Filters entities coming from ASSERTION type annotations and returns the CHUNKS.
-
-AssertionLogRegApproach
-Train a Assertion algorithm using a regression log model.
-
-AssertionLogRegModel
-This is a main class in AssertionLogReg family. Logarithmic Regression is used to extract Assertion Status
-
-AverageEmbeddings
-
-
-BertSentenceChunkEmbeddings
-BERT Sentence embeddings for chunk annotations which take into account the context of the sentence the chunk appeared in.
-
-Chunk2Token
-
-
-ChunkConverter
-Convert chunks from regexMatcher to chunks with a entity in the metadata.
-
-ChunkFilterer
-Model that Filters entities coming from CHUNK annotations. Filters can be set via a white list of terms or a regular expression.
-
-ChunkFiltererApproach
-Model that Filters entities coming from CHUNK annotations. Filters can be set via a white list of terms or a regular expression.
-
-ChunkKeyPhraseExtraction
-Chunk KeyPhrase Extraction uses Bert Sentence Embeddings to determine the most relevant key phrases describing a text.
-
-ChunkMergeApproach
-Merges two chunk columns coming from two annotators(NER, ContextualParser or any other annotator producing chunks).
-
-ChunkMergeModel
-The model produced by ChunkMergerAproach.
-
-ChunkSentenceSplitter
-Split the document using the chunks that you provided,and put in the metadata the chunk entity.
-
-CommonResolverParams
-Class used to have a common interface Entity Resolver family.
-
-ContextualParserApproach
-Creates a model, that extracts entity from a document based on user defined rules.
-
-ContextualParserModel
-Extracts entity from a document based on user defined rules.
-
-DateNormalizer
-Try to normalize dates in chunks annotations.
-
-DeIdentification
-Contains all the methods for training a DeIdentificationModel model.
-
-DeIdentificationModel
-The DeIdentificationModel model can obfuscate or mask the entities that contains personal information.
-
-DocumentLogRegClassifierApproach
-Trains a model to classify documents with a Logarithmic Regression algorithm.
-
-DocumentLogRegClassifierModel
-Classifies documents with a Logarithmic Regression algorithm.
-
-DrugNormalizer
-Annotator which normalizes raw text from clinical documents, e.g. scraped web pages or xml documents, from document type columns into Sentence.
-
-EntityChunkEmbeddings
-Weighted average embeddings of multiple named entities chunk annotations
-
-GenericClassifierApproach
-Trains a TensorFlow model for generic classification of feature vectors.
-
-GenericClassifierModel
-Generic classifier of feature vectors.
-
-IOBTagger
-Merges token tags and NER labels from chunks in the specified format.
-
-MedicalBertForSequenceClassification
-MedicalBertForTokenClassifier can load Bert Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g.
-
-MedicalBertForTokenClassifier
-MedicalBertForTokenClassifier can load Bert Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
-
-MedicalDistilBertForSequenceClassification
-MedicalDistilBertForSequenceClassification can load DistilBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g.
-
-MedicalNerApproach
-This Named Entity recognition annotator allows to train generic NER model based on Neural Networks.
-
-MedicalNerModel
-This Named Entity recognition annotator is a generic NER model based on Neural Networks.
-
-NerChunker
-Extracts phrases that fits into a known pattern using the NER tags. Useful for entity groups with neighboring tokens
-
-NerConverterInternal
-Converts a IOB or IOB2 representation of NER to a user-friendly one, by associating the tokens of recognized entities and their label.
-
-NerDisambiguator
-Links words of interest, such as names of persons, locations and companies, from an input text document to a corresponding unique entity in a target Knowledge Base (KB).
-
-NerDisambiguatorModel
-Links words of interest, such as names of persons, locations and companies, from an input text document to a corresponding unique entity in a target Knowledge Base (KB).
-
-PosologyREModel
-
-
-RENerChunksFilter
-Filters and outputs combinations of relations between extracted entities, for further processing.
-
-ReIdentification
-
-
-RelationExtractionApproach
-Trains a TensorFlow model for relation extraction.
-
-RelationExtractionDLModel
-Extracts and classifies instances of relations between named entities.
-
-RelationExtractionModel
-Trains a TensorFlow model for relation extraction.
-
-Router
-Convert chunks from regexMatcher to chunks with a entity in the metadata.
-
-SentenceEntityResolverApproach
-Thius class contains all the parameters and methods to train a SentenceEntityResolverModel.
-
-SentenceEntityResolverModel
-Thius class contains all the parameters and methods to train a SentenceEntityResolverModel.
-
-SentenceResolverParams
-Class used to have a common interface Sentence Resolver family.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.base.FeaturesAssembler.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.base.FeaturesAssembler.html
deleted file mode 100644
index fef3e80bdf..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.base.FeaturesAssembler.html
+++ /dev/null
@@ -1,918 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.base.FeaturesAssembler — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.base.FeaturesAssembler
-
-
-class sparknlp_jsl.base. FeaturesAssembler [source]
-Bases: sparknlp.internal.AnnotatorTransformer
-Methods
-
-
-
-
-
-
-__init__
()
-
-
-clear
(param)
-Clears a param from the param map if it has been explicitly set.
-
-copy
([extra])
-Creates a copy of this instance with the same uid and some extra params.
-
-explainParam
(param)
-Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.
-
-explainParams
()
-Returns the documentation of all params with their optionally default values and user-supplied values.
-
-extractParamMap
([extra])
-Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.
-
-getOrDefault
(param)
-Gets the value of a param in the user-supplied param map or its default value.
-
-getParam
(paramName)
-Gets a param by its name.
-
-getParamValue
(paramName)
-Gets the value of a parameter.
-
-hasDefault
(param)
-Checks whether a param has a default value.
-
-hasParam
(paramName)
-Tests whether this instance contains a param with a given (string) name.
-
-isDefined
(param)
-Checks whether a param is explicitly set by user or has a default value.
-
-isSet
(param)
-Checks whether a param is explicitly set by user.
-
-load
(path)
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-read
()
-Returns an MLReader instance for this class.
-
-save
(path)
-Save this ML instance to the given path, a shortcut of 'write().save(path)'.
-
-set
(param, value)
-Sets a parameter in the embedded param map.
-
-setInputCols
(value)
-Sets input columns name.
-
-setOutputCol
(value)
-Sets output column name.
-
-setParamValue
(paramName)
-Sets the value of a parameter.
-
-setParams
()
-
-
-transform
(dataset[, params])
-Transforms the input dataset with optional parameters.
-
-write
()
-Returns an MLWriter instance for this ML instance.
-
-
-
-Attributes
-
-
-
-
-
-
-getter_attrs
-
-
-inputCols
-
-
-name
-
-
-outputCol
-
-
-params
-Returns all params ordered by name.
-
-
-
-
-
-clear ( param )
-Clears a param from the param map if it has been explicitly set.
-
-
-
-
-copy ( extra = None )
-Creates a copy of this instance with the same uid and some
-extra params. This implementation first calls Params.copy and
-then make a copy of the companion Java pipeline component with
-extra params. So both the Python wrapper and the Java pipeline
-component get copied.
-
-Parameters
-
-extra dict, optional Extra parameters to copy to the new instance
-
-
-
-Returns
-
-JavaParams
Copy of this instance
-
-
-
-
-
-
-
-
-explainParam ( param )
-Explains a single param and returns its name, doc, and optional
-default value and user-supplied value in a string.
-
-
-
-
-explainParams ( )
-Returns the documentation of all params with their optionally
-default values and user-supplied values.
-
-
-
-
-Extracts the embedded default param values and user-supplied
-values, and then merges them with extra values from input into
-a flat param map, where the latter value is used if there exist
-conflicts, i.e., with ordering: default param values <
-user-supplied values < extra.
-
-Parameters
-
-extra dict, optional extra param values
-
-
-
-Returns
-
-dict merged param map
-
-
-
-
-
-
-
-
-getOrDefault ( param )
-Gets the value of a param in the user-supplied param map or its
-default value. Raises an error if neither is set.
-
-
-
-
-getParam ( paramName )
-Gets a param by its name.
-
-
-
-
-getParamValue ( paramName )
-Gets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-hasDefault ( param )
-Checks whether a param has a default value.
-
-
-
-
-hasParam ( paramName )
-Tests whether this instance contains a param with a given
-(string) name.
-
-
-
-
-isDefined ( param )
-Checks whether a param is explicitly set by user or has
-a default value.
-
-
-
-
-isSet ( param )
-Checks whether a param is explicitly set by user.
-
-
-
-
-classmethod load ( path )
-Reads an ML instance from the input path, a shortcut of read().load(path) .
-
-
-
-
-property params
-Returns all params ordered by name. The default implementation
-uses dir()
to get all attributes of type
-Param
.
-
-
-
-
-classmethod read ( )
-Returns an MLReader instance for this class.
-
-
-
-
-save ( path )
-Save this ML instance to the given path, a shortcut of ‘write().save(path)’.
-
-
-
-
-set ( param , value )
-Sets a parameter in the embedded param map.
-
-
-
-
-setInputCols ( value ) [source]
-Sets input columns name.
-
-Parameters
-
-value str Name of the input column
-
-
-
-
-
-
-
-
-setOutputCol ( value ) [source]
-Sets output column name.
-
-Parameters
-
-value str Name of the Output Column
-
-
-
-
-
-
-
-
-setParamValue ( paramName )
-Sets the value of a parameter.
-
-Parameters
-
-paramName str Name of the parameter
-
-
-
-
-
-
-
-
-transform ( dataset , params = None )
-Transforms the input dataset with optional parameters.
-
-
-Parameters
-
-dataset pyspark.sql.DataFrame
input dataset
-
-params dict, optional an optional param map that overrides embedded params.
-
-
-
-Returns
-
-pyspark.sql.DataFrame
transformed dataset
-
-
-
-
-
-
-
-
-uid
-A unique id for the object.
-
-
-
-
-write ( )
-Returns an MLWriter instance for this ML instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.base.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.base.html
deleted file mode 100644
index 94f9481c8d..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.base.html
+++ /dev/null
@@ -1,590 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.base — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.base
-
Classes
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.AnnotationToolJsonReader.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.AnnotationToolJsonReader.html
deleted file mode 100644
index 93fdf4d099..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.AnnotationToolJsonReader.html
+++ /dev/null
@@ -1,642 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.training.AnnotationToolJsonReader — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.CantemistReader.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.CantemistReader.html
deleted file mode 100644
index c99d60640a..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.CantemistReader.html
+++ /dev/null
@@ -1,620 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.training.CantemistReader — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.training.CantemistReader
-
-
-class sparknlp_jsl.training. CantemistReader ( scheme = 'IOB' ) [source]
-Bases: sparknlp.internal.ExtendedJavaWrapper
-Methods
-
-
-
-
-
-
-__init__
([scheme])
-
-
-apply
()
-
-
-new_java_array
(pylist, java_class)
-ToDo: Inspired from spark 2.0.
-
-new_java_array_integer
(pylist)
-
-
-new_java_array_string
(pylist)
-
-
-new_java_obj
(java_class, *args)
-
-
-readDatasetTaskNer
(spark, textFolder)
-
-
-
-
-
-
-new_java_array ( pylist , java_class )
-ToDo: Inspired from spark 2.0. Review if spark changes
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.CodiEspReader.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.CodiEspReader.html
deleted file mode 100644
index 5b47257d0c..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.CodiEspReader.html
+++ /dev/null
@@ -1,620 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.training.CodiEspReader — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.training.CodiEspReader
-
-
-class sparknlp_jsl.training. CodiEspReader ( scheme = 'IOB' ) [source]
-Bases: sparknlp.internal.ExtendedJavaWrapper
-Methods
-
-
-
-
-
-
-__init__
([scheme])
-
-
-apply
()
-
-
-new_java_array
(pylist, java_class)
-ToDo: Inspired from spark 2.0.
-
-new_java_array_integer
(pylist)
-
-
-new_java_array_string
(pylist)
-
-
-new_java_obj
(java_class, *args)
-
-
-readDatasetTaskX
(spark, path, textFolder[, sep])
-
-
-
-
-
-
-new_java_array ( pylist , java_class )
-ToDo: Inspired from spark 2.0. Review if spark changes
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.html
deleted file mode 100644
index 3ecded3df9..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.html
+++ /dev/null
@@ -1,596 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.training — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.training
-
Classes
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.tf_graph.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.tf_graph.html
deleted file mode 100644
index 1eb7d57da9..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.tf_graph.html
+++ /dev/null
@@ -1,188 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.training.tf_graph — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.training.tf_graph
-
Factory class to create the the different tensorflow graphs for ner_dl, generic_classifier, assertion_dl, relation_extraction annotators in spark-nlp healthcare
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.tf_graph_1x.html b/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.tf_graph_1x.html
deleted file mode 100644
index 59ac48a769..0000000000
--- a/docs/licensed/api/python/reference/autosummary/_autosummary/sparknlp_jsl.training.tf_graph_1x.html
+++ /dev/null
@@ -1,188 +0,0 @@
-
-
-
-
-
-
-
-
sparknlp_jsl.training.tf_graph_1x — Spark NLP 3.3.0 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
sparknlp_jsl.training.tf_graph_1x
-
Factory class to create the the different tensorflow graphs for ner_dl, generic_classifier, assertion_dl, relation_extraction annotators in spark-nlp healthcare
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file