diff --git a/README.md b/README.md
index 7d438e832..3a1133ae8 100644
--- a/README.md
+++ b/README.md
@@ -247,9 +247,11 @@ Thanks to the nginx container, we can have a local server for sitemap :
 
 ```
 docker compose up -d nginx # used to scrap sitemap locally - a figaro like website with only 3 news
-pytest test # "test" is the folder containing tests
+# docker compose up test with entrypoint modified to sleep
+# docker exec test bash
+pytest -vv --log-level DEBUG test # "test" is the folder containing tests
 # Only one test
-pytest -k 'mediatree' 
+pytest -vv --log-level DEBUG -k detect
 # OR
 docker compose up test # test is the container name running pytest test
 ```
diff --git a/docker-compose.yml b/docker-compose.yml
index 5c3f6ddfe..23d2a1c5b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,8 +5,8 @@ services:
     build:
       context: ./
       dockerfile: Dockerfile
-    entrypoint: ["poetry", "run", "pytest","-vv", "--cov-report", "term:skip-covered", "--cov=quotaclimat", "--cov=postgres", "test/"]
-    # entrypoint: ["sleep", "12000"] # use to debug the container if needed
+    entrypoint: ["poetry", "run", "pytest","-vv", "-o", "log_cli=true", "--cov-report", "term:skip-covered", "--cov=quotaclimat", "--cov=postgres", "test/"]
+    #entrypoint: ["sleep", "12000"] # use to debug the container if needed
     environment:
       ENV: docker
       # CHANNEL: "fr3-idf"
@@ -24,6 +24,7 @@ services:
       - ./postgres/:/app/postgres/
       - ./test/:/app/test/
       - ./app.py:/app/app.py
+      - ./pyproject.toml:/app/pyproject.toml
     depends_on:
       nginxtest:
         condition: service_healthy
diff --git a/postgres/insert_data.py b/postgres/insert_data.py
index cf5385438..fe85f1740 100644
--- a/postgres/insert_data.py
+++ b/postgres/insert_data.py
@@ -16,7 +16,7 @@ def clean_data(df: pd.DataFrame):
 # from https://stackoverflow.com/a/69421596/3535853
 def insert_or_do_nothing_on_conflict(table, conn, keys, data_iter):
     data = [dict(zip(keys, row)) for row in data_iter]
-    logging.debug("data_iter %s", data)
+
     insert_statement = insert(table.table).values(data)
 
     on_duplicate_key_stmt = insert_statement.on_conflict_do_update(
@@ -24,7 +24,6 @@ def insert_or_do_nothing_on_conflict(table, conn, keys, data_iter):
         set_={c.key: c for c in insert_statement.excluded},
     )
 
-    logging.debug("insert_statement %s", on_duplicate_key_stmt)
     return conn.execute(on_duplicate_key_stmt)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 83ed54010..d8e4771fd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,6 +10,9 @@ name = "pypi-public"
 url = "https://pypi.org/simple/"
 priority = "primary"
 
+[tool.pytest.ini_options]
+log_cli = 1
+log_cli_level = "DEBUG"
 
 [[tool.poetry.source]]
 name = "PyPI"
diff --git a/quotaclimat/data_processing/mediatree/api_import.py b/quotaclimat/data_processing/mediatree/api_import.py
index b31b72fe5..25784401a 100644
--- a/quotaclimat/data_processing/mediatree/api_import.py
+++ b/quotaclimat/data_processing/mediatree/api_import.py
@@ -8,7 +8,7 @@
 import sys
 import os
 from quotaclimat.utils.healthcheck_config import run_health_check_server
-from quotaclimat.utils.logger import CustomFormatter
+from quotaclimat.utils.logger import getLogger
 from quotaclimat.data_processing.mediatree.utils import *
 from quotaclimat.data_processing.mediatree.config import *
 from quotaclimat.data_processing.mediatree.update_pg_keywords import *
@@ -214,9 +214,8 @@ def parse_number_pages(response_sub) -> int :
 
 def parse_reponse_subtitle(response_sub, channel = None) -> Optional[pd.DataFrame]:
     with sentry_sdk.start_transaction(op="task", name="parse_reponse_subtitle"):
-        logging.debug(f"Parsing json response:\n {response_sub}")
-        
         total_results = parse_total_results(response_sub)
+        logging.getLogger("modin.logger.default").setLevel(logging.WARNING)
         if(total_results > 0):
             logging.info(f"{total_results} 'total_results' field")
             
@@ -230,8 +229,7 @@ def parse_reponse_subtitle(response_sub, channel = None) -> Optional[pd.DataFram
             new_df.rename(columns={'channel.name':'channel_name', 'channel.radio': 'channel_radio', 'timestamp':'start'}, inplace=True)
 
             log_dataframe_size(new_df, channel)
-
-            logging.debug("Parsed %s" % (new_df.head(1).to_string()))
+            
             logging.debug("Parsed Schema\n%s", new_df.dtypes)
             
             return new_df
@@ -267,17 +265,7 @@ async def main():
     sys.exit(0)
 
 if __name__ == "__main__":
-    # create logger with 'spam_application'
-    logger = logging.getLogger()
-    logger.setLevel(level=os.getenv('LOGLEVEL', 'INFO').upper())
-
-    # create console handler with a higher log level
-    if (logger.hasHandlers()):
-        logger.handlers.clear()
-    ch = logging.StreamHandler()
-    ch.setFormatter(CustomFormatter())
-    logger.addHandler(ch)
-
+    getLogger()
     asyncio.run(main())
     sys.exit(0)
 
diff --git a/quotaclimat/data_processing/mediatree/detect_keywords.py b/quotaclimat/data_processing/mediatree/detect_keywords.py
index 0fa36ca75..2029ed758 100644
--- a/quotaclimat/data_processing/mediatree/detect_keywords.py
+++ b/quotaclimat/data_processing/mediatree/detect_keywords.py
@@ -12,7 +12,7 @@
 import sentry_sdk
 import modin.pandas as pd
 import dask
-
+from quotaclimat.utils.logger import getLogger
 dask.config.set({'dataframe.query-planning': True})
 
 def get_cts_in_ms_for_keywords(subtitle_duration: List[dict], keywords: List[str], theme: str) -> List[dict]:
@@ -49,7 +49,7 @@ def format_word_regex(word: str) -> str:
 def is_word_in_sentence(words: str, sentence: str) -> bool :
     # words can contain plurals and several words
     words = ' '.join(list(map(( lambda x: format_word_regex(x)), words.split(" "))))
-    logging.debug(f"testing {words}")
+
     #  test https://regex101.com/r/ilvs9G/1/
     if re.search(rf"\b{words}(?![\w-])", sentence, re.IGNORECASE):
         logging.debug(f"words {words} found in {sentence}")
@@ -57,10 +57,42 @@ def is_word_in_sentence(words: str, sentence: str) -> bool :
     else:
         return False
 
-# some keywords are contained inside other keywords, we need to filter them
 
-def filter_keyword_with_same_timestamp(keywords_with_timestamp: List[dict]) -> List[dict]:
-    # Group keywords by timestamp
+def set_timestamp_with_margin(keywords_with_timestamp: List[dict]) -> List[dict]:
+    number_of_keywords = len(keywords_with_timestamp)
+    if number_of_keywords > 1:
+        for i in range(len(keywords_with_timestamp) - 1):
+            current_timestamp = keywords_with_timestamp[i].get("timestamp")
+            next_timestamp = keywords_with_timestamp[i + 1].get("timestamp")
+            current_keyword = keywords_with_timestamp[i].get("keyword")
+            next_keyword = keywords_with_timestamp[i + 1].get("keyword")
+
+            if current_timestamp is not None and next_timestamp is not None:           
+                if next_timestamp - current_timestamp < 1000:
+                    current_keyword = keywords_with_timestamp[i].get("keyword")
+                    next_keyword = keywords_with_timestamp[i + 1].get("keyword")
+                    if len(current_keyword) > len(next_keyword):
+                        shortest_word = next_keyword
+                        longest_word = current_keyword
+                        timestamp_to_change = current_timestamp
+                    else:
+                        shortest_word = current_keyword
+                        longest_word = next_keyword
+                        timestamp_to_change = next_timestamp
+                    
+                    if shortest_word in longest_word:
+                        logging.info(f"Close keywords - we group them {shortest_word} - {longest_word}")
+                        keywords_with_timestamp[i]["timestamp"] = timestamp_to_change
+                        keywords_with_timestamp[i+1]["timestamp"] = timestamp_to_change
+
+    return keywords_with_timestamp
+
+# some keywords are contained inside other keywords, we need to filter them
+def filter_keyword_with_same_timestamp(keywords_with_timestamp: List[dict])-> List[dict]:
+    logging.debug(f"Filtering keywords with same timestamp with a margin of one second")
+    number_of_keywords = len(keywords_with_timestamp) 
+    keywords_with_timestamp = set_timestamp_with_margin(keywords_with_timestamp)
+    # Group keywords by timestamp - with a margin of 1 second 
     grouped_keywords = {timestamp: list(group) for timestamp, group in groupby(keywords_with_timestamp, key=lambda x: x['timestamp'])}
 
     # Filter out keywords with the same timestamp and keep the longest keyword
@@ -68,6 +100,10 @@ def filter_keyword_with_same_timestamp(keywords_with_timestamp: List[dict]) -> L
         max(group, key=lambda x: len(x['keyword']))
         for group in grouped_keywords.values()
     ]
+    final_result = len(result)
+
+    if final_result < number_of_keywords:
+        logging.info(f"Filtering keywords {final_result} out of {number_of_keywords} | {keywords_with_timestamp} with final result {result}")
 
     return result
 
diff --git a/quotaclimat/utils/logger.py b/quotaclimat/utils/logger.py
index 4763307f7..ed7848f8f 100644
--- a/quotaclimat/utils/logger.py
+++ b/quotaclimat/utils/logger.py
@@ -1,5 +1,5 @@
 import logging
-
+import os
 class CustomFormatter(logging.Formatter):
 
     grey = "\x1b[38;20m"
@@ -21,4 +21,17 @@ class CustomFormatter(logging.Formatter):
     def format(self, record):
         log_fmt = self.FORMATS.get(record.levelno)
         formatter = logging.Formatter(log_fmt)
-        return formatter.format(record)
\ No newline at end of file
+        return formatter.format(record)
+    
+def getLogger():
+    # create logger with 'spam_application'
+    logger = logging.getLogger()
+    logger.setLevel(level=os.getenv('LOGLEVEL', 'INFO').upper())
+    # create console handler with a higher log level
+    if (logger.hasHandlers()):
+        logger.handlers.clear()
+    ch = logging.StreamHandler()
+    ch.setFormatter(CustomFormatter())
+    logger.addHandler(ch)
+
+    return logger
\ No newline at end of file
diff --git a/test/sitemap/test_detect_keywords.py b/test/sitemap/test_detect_keywords.py
index d4166f3b8..b3c6f47d3 100644
--- a/test/sitemap/test_detect_keywords.py
+++ b/test/sitemap/test_detect_keywords.py
@@ -67,6 +67,12 @@ def test_get_themes_keywords_duration():
      ,"adaptation_climatique_solutions_directes"
     ],[], 0]
 
+
+    assert get_themes_keywords_duration("il rencontre aussi une crise majeure de la pénurie de l' offre laetitia jaoude des barrages sauvages", subtitles, start) == [[
+      "changement_climatique_consequences"
+     ,"atténuation_climatique_solutions_directes"
+    ],[], 0]
+
 def test_get_cts_in_ms_for_keywords():
     str = [{
           "duration_ms": 34,
@@ -310,7 +316,7 @@ def test_complexe_filter_and_tag_by_theme():
             "text": "planète"
             },{
             "duration_ms": 34,
-            "cts_in_ms":original_timestamp + 10,
+            "cts_in_ms": original_timestamp + get_keyword_time_separation_ms(),
             "text": "conditions"
             },{
             "duration_ms": 34,
@@ -354,16 +360,16 @@ def test_complexe_filter_and_tag_by_theme():
         ],
         "keywords_with_timestamp": [{
                 "keyword" : 'habitabilité de la planète',
-                "timestamp": 1706437079006, # count for one
+                "timestamp": original_timestamp_first_keyword, # count for one
                 "theme":"changement_climatique_constat",
             },
             {
                 "keyword" : 'conditions de vie sur terre',
-                "timestamp": 1706437079010, # timestamp too close
+                "timestamp": original_timestamp + get_keyword_time_separation_ms(), # timestamp too close
                 "theme":"changement_climatique_constat",
             }
         ]
-        ,"number_of_keywords": 1
+        ,"number_of_keywords": 2
     }])
 
     # List of words to filter on
@@ -614,7 +620,7 @@ def test_keyword_inside_keyword_filter_keyword_with_same_timestamp():
     
     assert filter_keyword_with_same_timestamp(keywords_with_timestamp) == expected
 
-def test_keyword_inside_keyword_filter_keyword_with_same_timestamp():
+def test_keyword_2words_inside_keyword_filter_keyword_with_same_timestamp():
     keywords_with_timestamp = [{
                 "keyword" : 'agriculture',
                 "timestamp": original_timestamp,
@@ -636,6 +642,59 @@ def test_keyword_inside_keyword_filter_keyword_with_same_timestamp():
 
     assert filter_keyword_with_same_timestamp(keywords_with_timestamp) == expected
 
+# we should keep the longest keyword, even it's come before the first one
+def test_keyword_second_word_a_bit_later_inside_keyword_filter_keyword_with_same_timestamp():
+    later_timestamp = original_timestamp + 960 # from real data
+    keywords_with_timestamp = [{
+                "keyword" : 'carbone',
+                "timestamp": later_timestamp,
+                "theme":"changement_climatique_causes_directes",
+            },
+            {
+                "keyword" : 'béton bas carbone',
+                "timestamp": original_timestamp, # same timestamp, so we take longest keyword
+                "theme":"atténuation_climatique_solutions_directes", # different theme, keep this one
+            }
+    ]
+
+    expected = [{
+                "keyword" : 'béton bas carbone',
+                "timestamp": original_timestamp, # same timestamp, so we take longest keyword
+                "theme":"atténuation_climatique_solutions_directes", # different theme, keep this one
+            }
+    ]
+
+    assert filter_keyword_with_same_timestamp(keywords_with_timestamp) == expected
+
+# we should keep the longest keyword, even it's come before the first one
+def test_keyword_second_word_to_keep_inside_keyword_filter_keyword_with_same_timestamp():
+    keywords_with_timestamp = [{
+                    "theme": "changement_climatique_consequences",
+                    "timestamp": 1707627703040,
+                    "keyword": "pénurie"
+            },
+            {
+                "theme":"atténuation_climatique_solutions_directes", # different theme, keep this one
+                "timestamp": 1707627708051,
+                "keyword": "barrages"
+            },
+    ]
+
+    expected = [
+        {
+                "keyword": "pénurie",
+                "timestamp": 1707627703040,
+                "theme": "changement_climatique_consequences",
+        },
+        {
+            "keyword" : 'barrages',
+            "timestamp": 1707627708051, # same timestamp, so we take longest keyword
+            "theme":"atténuation_climatique_solutions_directes", # different theme, keep this one
+        }
+    ]
+
+    assert filter_keyword_with_same_timestamp(keywords_with_timestamp) == expected
+
 def test_filter_keyword_with_same_timestamp():
     keywords_with_timestamp = [{ #nothing to filter
                 "keyword" : "période la plus chaude",
@@ -644,7 +703,7 @@ def test_filter_keyword_with_same_timestamp():
             },
             {
                 "keyword" : "élévation du niveau de la mer",
-                "timestamp": original_timestamp + 1,
+                "timestamp": original_timestamp + 1200, # margin superior to 1000ms
                 "theme":"changement_climatique_consequences",
             }
     ]
diff --git a/test/sitemap/test_main_import_api.py b/test/sitemap/test_main_import_api.py
index 5f0949811..9174017d7 100644
--- a/test/sitemap/test_main_import_api.py
+++ b/test/sitemap/test_main_import_api.py
@@ -51,11 +51,12 @@ def test_second_row_api_import():
         primary_key = "67b9cc593516b40f55d6a3e89b377fccc8ab76d263c5fd6d4bfe379626190641"
         specific_keyword = get_keyword(primary_key)
         assert specific_keyword.theme == [
-        "changement_climatique_constat",
-        "changement_climatique_causes_indirectes",
-        "changement_climatique_consequences",
-        "atténuation_climatique_solutions_directes"
+            "changement_climatique_constat",
+            "changement_climatique_causes_indirectes",
+            "changement_climatique_consequences",
+            "atténuation_climatique_solutions_directes"
         ]
+
         assert specific_keyword.keywords_with_timestamp == [ # from metabase to speedup check
             {
                 "keyword": "écologique",
@@ -67,11 +68,6 @@ def test_second_row_api_import():
                 "timestamp": 1707627631076,
                 "theme": "changement_climatique_constat"
             },
-            {
-                "keyword": "pétrole",
-                "timestamp": 1707627629004,
-                "theme": "changement_climatique_causes_indirectes"
-            },
             {
                 "keyword": "puits de pétrole",
                 "timestamp": 1707627628054,
@@ -92,7 +88,7 @@ def test_second_row_api_import():
                 "timestamp": 1707627686004,
                 "theme": "atténuation_climatique_solutions_directes"
             }
-            ]
+        ]
         assert specific_keyword.number_of_keywords == 4
 
 def test_third_row_api_import():
diff --git a/test/sitemap/test_mediatree_utils.py b/test/sitemap/test_mediatree_utils.py
index 3d659b295..7b6677649 100644
--- a/test/sitemap/test_mediatree_utils.py
+++ b/test/sitemap/test_mediatree_utils.py
@@ -2,7 +2,7 @@
 import pandas as pd
 
 from utils import get_localhost
-from quotaclimat.data_processing.mediatree.utils import get_yesterday, get_date_range, get_start_end_date_env_variable_with_default, is_it_tuesday
+from quotaclimat.data_processing.mediatree.utils import *
 
 import logging
 from time import strftime,localtime