From 4217ca7f6a8f179e8d48b594226c3c37f773c2d7 Mon Sep 17 00:00:00 2001 From: Jamarri Green <94587378+greenjam19@users.noreply.github.com> Date: Fri, 8 Dec 2023 18:55:49 -0500 Subject: [PATCH] Created yake_example.txt Co-Authored-By: Konain Qureshi --- ACMAS/app/ACMAS_Web/yake_example.txt | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 ACMAS/app/ACMAS_Web/yake_example.txt diff --git a/ACMAS/app/ACMAS_Web/yake_example.txt b/ACMAS/app/ACMAS_Web/yake_example.txt new file mode 100644 index 0000000..4b32fc3 --- /dev/null +++ b/ACMAS/app/ACMAS_Web/yake_example.txt @@ -0,0 +1,34 @@ +Example: +=============================================================================== +language = "en" +max_ngram_size = 1 +deduplication_threshold = 0.9 +numOfKeywords = 10 + +custom_kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, + dedupLim=deduplication_threshold, top=numOfKeywords, features=None) + +question = "What is 1 plus 1?" + +keywords = custom_kw_extractor.extract_keywords(question) +=============================================================================== + + +Keywords would contain the most relevant keywords to the question. For example, the question "How do find the integral of a function" would extract to : + +(Integral,0.013) +(function,0.016) +(how,0.03) +(find,0.4) + +Where the numbers within the tuples correspond to the relevence factor of that word. + +The parameters of the Yake program can be tweaked to obtain different keywords. + +lan ------- used to set the language of the extractor +n --------- used to extract words that are within the max_ngram_size +dedupLim -- the similarity parameter that only accepts words that fall within that threshold. +top ------- used to obtain the top x keywords +features -- a parameter to fine tune the Yake model, it is used to define the way to extract keywords with a specific algorithm eg. KPF or Keyword Performance Factor + +Additional help can be found on the Yake github : https://github.com/LIAAD/yake/tree/master/yake