diff --git a/src/nlpsig/data_preparation.py b/src/nlpsig/data_preparation.py
index 9024fd9..de47afd 100644
--- a/src/nlpsig/data_preparation.py
+++ b/src/nlpsig/data_preparation.py
@@ -49,7 +49,10 @@ def __init__(
         pooled_embeddings: np.array | None = None,
         id_column: str | None = None,
         label_column: str | None = None,
+        verbose: bool = True,
     ):
+        self.verbose = verbose
+
         # perform checks that original_df have the right column names to work with
         if embeddings.ndim != 2:
             raise ValueError("`embeddings` should be a 2-dimensional array.")
@@ -79,6 +82,7 @@ def __init__(
         # obtain modelling dataframe
         self.df: pd.DataFrame | None = None
         self.df = self._get_modeling_dataframe()
+
         # set pooled embeddings if provided
         if pooled_embeddings is not None:
             if pooled_embeddings.ndim != 2:
@@ -86,10 +90,11 @@ def __init__(
                     "If provided, `pooled_embeddings` should be a 2-dimensional array."
                 )
             if len(self.df[self.id_column].unique()) != pooled_embeddings.shape[0]:
-                print(
-                    f"[INFO] `len(self.df[self.id_column].unique())`={len(self.df[self.id_column].unique())}"
-                    f" and `pooled_embeddings.shape[0]`={pooled_embeddings.shape[0]}."
-                )
+                if self.verbose:
+                    print(
+                        f"[INFO] `len(self.df[self.id_column].unique())`={len(self.df[self.id_column].unique())}"
+                        f" and `pooled_embeddings.shape[0]`={pooled_embeddings.shape[0]}."
+                    )
                 raise ValueError(
                     "If provided, `pooled_embeddings` should have the same number "
                     "of rows as there are different ids in the id-column."
@@ -123,17 +128,21 @@ def _get_modeling_dataframe(self) -> pd.DataFrame:
         if self.df is not None:
             return self.df
 
-        print("[INFO] Concatenating the embeddings to the dataframe...")
-        print("[INFO] - columns beginning with 'e' denote the full embddings.")
+        if self.verbose:
+            print("[INFO] Concatenating the embeddings to the dataframe...")
+            print("[INFO] - columns beginning with 'e' denote the full embddings.")
+
         embedding_df = pd.DataFrame(
             self.embeddings,
             columns=[f"e{i+1}" for i in range(self.embeddings.shape[1])],
         )
 
         if self.embeddings_reduced is not None:
-            print(
-                "[INFO] - columns beginning with 'd' denote the dimension reduced embeddings."
-            )
+            if self.verbose:
+                print(
+                    "[INFO] - columns beginning with 'd' denote the dimension reduced embeddings."
+                )
+
             embeddings_reduced_df = pd.DataFrame(
                 self.embeddings_reduced,
                 columns=[f"d{i+1}" for i in range(self.embeddings_reduced.shape[1])],
@@ -151,17 +160,21 @@ def _get_modeling_dataframe(self) -> pd.DataFrame:
                 [self.original_df.reset_index(drop=True), embedding_df],
                 axis=1,
             )
+
         if self.id_column is None:
             self.id_column = "dummy_id"
-            print(
-                f"[INFO] No id_column was passed, so setting id_column to '{self.id_column}'."
-            )
+            if self.verbose:
+                print(
+                    f"[INFO] No id_column was passed, so setting id_column to '{self.id_column}'."
+                )
+
         if self.id_column not in self.original_df.columns:
+            if self.verbose:
+                print(
+                    f"[INFO] There is no column in `.original_df` called '{self.id_column}'. "
+                    f"Adding a new column named '{self.id_column}' of zeros."
+                )
             # set default value to id_column
-            print(
-                f"[INFO] There is no column in `.original_df` called '{self.id_column}'. "
-                f"Adding a new column named '{self.id_column}' of zeros."
-            )
             df[self.id_column] = 0
 
         return df
@@ -203,9 +216,13 @@ def _set_time_features(self) -> pd.DataFrame:
             Updated dataframe with time features.
         """
         if self.time_features_added:
-            print("Time features have already been added.")
+            if self.verbose:
+                print("Time features have already been added.")
             return None
-        print("[INFO] Adding time feature columns into dataframe in `.df`.")
+
+        if self.verbose:
+            print("[INFO] Adding time feature columns into dataframe in `.df`.")
+
         if "datetime" in self.df.columns:
             self._feature_list += ["time_encoding", "time_diff"]
 
@@ -213,7 +230,9 @@ def _set_time_features(self) -> pd.DataFrame:
             self.df["datetime"] = pd.to_datetime(self.df["datetime"])
 
             # obtain time encoding by computing the fraction of year it is in
-            print("[INFO] Adding 'time_encoding' feature...")
+            if self.verbose:
+                print("[INFO] Adding 'time_encoding' feature...")
+
             self.df["time_encoding"] = self.df["datetime"].map(
                 lambda t: self._time_fraction(t)
             )
@@ -224,7 +243,9 @@ def _set_time_features(self) -> pd.DataFrame:
             self.df = self.df.sort_values(by=[self.id_column, "datetime"])
 
             # calculate time difference between posts
-            print("[INFO] Adding 'time_diff' feature...")
+            if self.verbose:
+                print("[INFO] Adding 'time_diff' feature...")
+
             self.df["time_diff"] = list(
                 self.df.groupby(self.id_column)
                 .apply(
@@ -240,18 +261,22 @@ def _set_time_features(self) -> pd.DataFrame:
                 .explode()
             )
         else:
-            print(
-                "[INFO] Note 'datetime' is not a column in `.df`, "
-                "so only 'timeline_index' is added."
-            )
-            print(
-                "[INFO] As 'datetime' is not a column in `.df`, "
-                "we assume that the data is ordered by time with respect to the id."
-            )
+            if self.verbose:
+                print(
+                    "[INFO] Note 'datetime' is not a column in `.df`, "
+                    "so only 'timeline_index' is added."
+                )
+                print(
+                    "[INFO] As 'datetime' is not a column in `.df`, "
+                    "we assume that the data is ordered by time with respect to the id."
+                )
+
         # assign index for each post in each timeline
         self._feature_list += ["timeline_index"]
 
-        print("[INFO] Adding 'timeline_index' feature...")
+        if self.verbose:
+            print("[INFO] Adding 'timeline_index' feature...")
+
         self.df["timeline_index"] = list(
             self.df.groupby(self.id_column)
             .apply(lambda x: list(range(1, len(x) + 1)))
@@ -756,9 +781,11 @@ def pad(
                   dimension reduced embeddings, time features)
 
         """
-        print(
-            "[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes."
-        )
+        if self.verbose:
+            print(
+                "[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes."
+            )
+
         if pad_by not in ["id", "history"]:
             raise ValueError("`pad_by` must be either 'id' or 'history'.")
 
@@ -1017,11 +1044,13 @@ def get_torch_path_for_SWNUNetwork(
         if include_embedding_in_input:
             # repeat the embeddings which will be concatenated to the path later
             if self.pad_method == "id":
-                print(
-                    f"[INFO] The path was created for each {self.id_column} in the dataframe, "
-                    "so to include embeddings in the FFN input, we concatenate the "
-                    "pooled embeddings."
-                )
+                if self.verbose:
+                    print(
+                        f"[INFO] The path was created for each {self.id_column} in the dataframe, "
+                        "so to include embeddings in the FFN input, we concatenate the "
+                        "pooled embeddings."
+                    )
+
                 if self.pooled_embeddings is None:
                     raise ValueError(
                         "There were no pooled embeddings passed into the class."
@@ -1035,11 +1064,13 @@ def get_torch_path_for_SWNUNetwork(
                     )
                 emb = torch.from_numpy(self.pooled_embeddings.astype("float")).float()
             elif self.pad_method == "history":
-                print(
-                    "[INFO] The path was created for each item in the dataframe, "
-                    "by looking at its history, so to include embeddings in the FFN input, "
-                    "we concatenate the embeddings for each sentence / text."
-                )
+                if self.verbose:
+                    print(
+                        "[INFO] The path was created for each item in the dataframe, "
+                        "by looking at its history, so to include embeddings in the FFN input, "
+                        "we concatenate the embeddings for each sentence / text."
+                    )
+
                 if reduced_embeddings:
                     if self.embeddings_reduced is None:
                         raise ValueError(
@@ -1148,10 +1179,11 @@ def check_history_length_for_SeqSigNet(
         required_history_length = shift * n + (window_size - shift)
         if self.array_padded.shape[1] != required_history_length:
             # required history length not met
-            print(
-                f"A history length of size {required_history_length} is required, "
-                f"but we have history length size of {self.array_padded.shape[1]}"
-            )
+            if self.verbose:
+                print(
+                    f"A history length of size {required_history_length} is required, "
+                    f"but we have history length size of {self.array_padded.shape[1]}"
+                )
             return False
 
         # we have the required history length
diff --git a/src/nlpsig/encode_text.py b/src/nlpsig/encode_text.py
index f0bfeea..dc2ed0b 100644
--- a/src/nlpsig/encode_text.py
+++ b/src/nlpsig/encode_text.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import pickle
+import warnings
 from typing import Callable, Iterable
 
 import numpy as np
@@ -93,10 +94,13 @@ def __init__(
         model_modules: Iterable[nn.Module] | None = None,
         model_encoder_args: dict | None = None,
         model_fit_args: dict | None = None,
+        verbose: bool = True,
     ):
+        self.verbose = verbose
+
         self.df = df
         if feature_name not in df.columns:
-            raise KeyError(f"{feature_name} is not a column in df")
+            raise KeyError(f"{feature_name} is not a column in df.")
         self.feature_name = feature_name
         self.sentence_embeddings = None
         self.model_name = model_name
@@ -142,7 +146,7 @@ def load_pre_computed_embeddings(self, pre_computed_embeddings_file: str) -> Non
                 raise ValueError(
                     f"the loaded embeddings from {pre_computed_embeddings_file} "
                     "must be a (n x d) array where n is the number of sentences "
-                    "and d is the dimension of the embeddings"
+                    "and d is the dimension of the embeddings."
                 )
         self.model_name = "pre-computed"
         self.model_modules = None
@@ -170,14 +174,15 @@ def load_pretrained_model(self, force_reload: bool = False) -> None:
             See https://www.sbert.net/docs/pretrained_models.html for examples.
         """
         if (not force_reload) and (self.model is not None):
-            print(f"[INFO] '{self.model_name}' model is already loaded")
+            warnings.warn(f"'{self.model_name}' model is already loaded.", stacklevel=3)
             return
+
         if (force_reload) and (self.model == "pre-computed"):
-            print(
-                "[INFO] The current embeddings were computed before "
-                "and were loaded into this class"
+            warnings.warn(
+                "The current embeddings were pre-computed and loaded.", stacklevel=3
             )
             return
+
         try:
             self.model = SentenceTransformer(model_name_or_path=self.model_name)
         except Exception as err:
@@ -208,18 +213,20 @@ def load_custom_model(self, force_reload: bool = False) -> None:
             for examples.
         """
         if (not force_reload) and (self.model is not None):
-            print(f"[INFO] '{self.model_name}' model is already loaded")
+            warnings.warn(f"'{self.model_name}' model is already loaded.", stacklevel=3)
             return
+
         if (force_reload) and (self.model == "pre-computed"):
-            print(
-                "[INFO] The current embeddings were computed before "
-                "and were loaded into this class"
+            warnings.warn(
+                "The current embeddings were pre-computed and loaded.", stacklevel=3
             )
             return
+
         if self.model_modules is None:
             raise ValueError(
                 "`.model_modules` must be a list of modules which define the network architecture."
             )
+
         try:
             self.model = SentenceTransformer(modules=self.model_modules)
         except Exception as err:
@@ -250,10 +257,14 @@ def obtain_embeddings(self) -> np.array:
                 "or `.load_custom_model()` methods first"
             )
         sentences = self.df[self.feature_name].to_list()
-        print(f"[INFO] number of sentences to encode: {len(sentences)}")
+
+        if self.verbose:
+            print(f"[INFO] number of sentences to encode: {len(sentences)}")
+
         self.sentence_embeddings = np.array(
             self.model.encode(sentences, **self.model_encoder_args)
         )
+
         return self.sentence_embeddings
 
     def fit_transformer(
@@ -327,7 +338,10 @@ def __init__(
         config: PretrainedConfig | None = None,
         tokenizer: PreTrainedTokenizer | None = None,
         data_collator: DataCollator | None = None,
+        verbose: bool = True,
     ):
+        self.verbose = verbose
+
         # check feature name is a string or list of length 1 or 2 of strings
         if isinstance(feature_name, str):
             # convert to list of one element
@@ -405,19 +419,23 @@ def load_pretrained_model(self, force_reload: bool = False) -> None:
             Whether or not to overwrite current loaded model, by default False.
         """
         if (not force_reload) and (self.model is not None):
-            print(f"[INFO] '{self.model_name}' model is already loaded.")
+            warnings.warn(f"'{self.model_name}' model is already loaded.", stacklevel=3)
             return
+
         if self.model_name is None:
             raise TypeError("")
+
+        if self.verbose:
+            print(
+                "[INFO] By default, `.load_pretrained_model()` uses "
+                "`AutoModel` to load in the model. "
+                "If you want to load the model for a specific task, "
+                "reset the `.model` attribute."
+            )
+
         self.config = AutoConfig.from_pretrained(self.model_name)
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         self.data_collator = DataCollatorWithPadding(self.tokenizer)
-        Warning(
-            "[INFO] By default, `.load_pretrained_model()` uses "
-            "`AutoModel` to load in the model. "
-            "If you want to load the model for a specific task, "
-            "reset the `.model` attribute."
-        )
         self.model = AutoModel.from_pretrained(self.model_name)
         self.model.eval()
 
@@ -434,10 +452,12 @@ def initialise_transformer(self, force_reload: bool = False, **config_args) -> N
             Passed along to `AutoConfig.from_pretrained()` method.
         """
         if (not force_reload) and (self.model is not None):
-            print(f"[INFO] '{self.model_name}' model is already loaded.")
+            warnings.warn(f"'{self.model_name}' model is already loaded.", stacklevel=3)
             return
+
         if self.model_name is None:
             raise TypeError("")
+
         self.config = AutoConfig.from_pretrained(self.model_name, **config_args)
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         self.data_collator = DataCollatorWithPadding(self.tokenizer)
@@ -533,8 +553,11 @@ def tokenize_text(
             # by default does not perform padding initially,
             # as will utilise dynamic padding later on
             tokenizer_args = {"padding": False, "truncation": True}
+
         if not tokenizer_args.get("return_special_tokens_mask"):
-            print("[INFO] Setting return_special_tokens_mask=True")
+            if self.verbose:
+                print("[INFO] Setting return_special_tokens_mask=True")
+
             tokenizer_args["return_special_tokens_mask"] = True
 
         # define tokenize_function for mapping to Dataset object
@@ -556,7 +579,9 @@ def tokenize_function(dataset):
                 )
 
         # tokenize the dataset and save the tokens in .tokens attribute
-        print("[INFO] Tokenizing the dataset...")
+        if self.verbose:
+            print("[INFO] Tokenizing the dataset...")
+
         self.dataset = self.dataset.map(
             tokenize_function,
             batched=batched,
@@ -565,9 +590,10 @@ def tokenize_function(dataset):
         self.tokens = self.dataset.remove_columns(self._features)
 
         # save the tokenized text to `.df["tokens"] (does not include special tokens)
-        print(
-            "[INFO] Saving the tokenized text for each sentence into `.df['tokens']`..."
-        )
+        if self.verbose:
+            print(
+                "[INFO] Saving the tokenized text for each sentence into `.df['tokens']`..."
+            )
 
         cls_token_avail = self.tokenizer.cls_token is not None
 
@@ -593,6 +619,7 @@ def tokenize_decoder(dataset):
                 )
             return {"tokens": tokens}
 
+        # token apply tokenize_decoder to dataset to obtain tokens
         self.dataset = self.dataset.map(
             tokenize_decoder,
             batched=batched,
@@ -601,18 +628,23 @@ def tokenize_decoder(dataset):
         self.df["tokens"] = self.dataset["tokens"]
 
         # create new tokenized dataframe
-        print(
-            "[INFO] Creating tokenized dataframe and setting in `.tokenized_df` attribute..."
-        )
+        if self.verbose:
+            print(
+                "[INFO] Creating tokenized dataframe and setting in `.tokenized_df` attribute..."
+            )
+
         self.tokenized_df = self.df.drop(
             columns=self.feature_name,
             errors="ignore",
         ).explode("tokens")
         self.tokenized_df = self.tokenized_df.reset_index()
-        print(
-            f"[INFO] Note: '{text_id_col_name}' is the "
-            "column name for denoting the corresponding text id"
-        )
+
+        if self.verbose:
+            print(
+                f"[INFO] Note: '{text_id_col_name}' is the "
+                "column name for denoting the corresponding text id"
+            )
+
         self.tokenized_df = self.tokenized_df.rename(
             columns={"index": text_id_col_name}
         )
@@ -1081,9 +1113,10 @@ def split_dataset(
             (if `valid_size` is not None), and test (`test`) Datasets.
         """
         if self.dataset_split is not None:
-            print(
-                "[INFO] Dataset has already been split. "
-                "If required to split again, first set `.dataset_split` attribute to None"
+            warnings.warn(
+                "Dataset has already been split. If required to split again, first set "
+                "`.dataset_split` attribute to None",
+                stacklevel=3,
             )
             return self.dataset_split
 
@@ -1106,16 +1139,17 @@ def split_dataset(
             )
         else:
             # indices are not provided, so split the dataset
-            if valid_size is None:
-                print(
-                    "[INFO] Splitting up dataset into train / test sets, "
-                    "and saving to `.dataset_split`."
-                )
-            else:
-                print(
-                    "[INFO] Splitting up dataset into train / validation / test sets, "
-                    "and saving to `.dataset_split`."
-                )
+            if self.verbose:
+                if valid_size is None:
+                    print(
+                        "[INFO] Splitting up dataset into train / test sets, "
+                        "and saving to `.dataset_split`."
+                    )
+                else:
+                    print(
+                        "[INFO] Splitting up dataset into train / validation / test sets, "
+                        "and saving to `.dataset_split`."
+                    )
 
             # first split data into train/valid set, test set
             train_test = self.dataset.train_test_split(
@@ -1166,9 +1200,11 @@ def set_up_training_args(self, output_dir: str, **kwargs) -> TrainingArguments:
         TrainingArguments
             `TrainingArguments` object.
         """
-        print(
-            "[INFO] Setting up TrainingArguments object and saving to `.training_args`."
-        )
+        if self.verbose:
+            print(
+                "[INFO] Setting up TrainingArguments object and saving to `.training_args`."
+            )
+
         if kwargs is None:
             kwargs = {}
         if "evaluation_strategy" not in kwargs:
@@ -1209,17 +1245,21 @@ def set_up_trainer(
         # check model, tokenizer and data_collator have been passed into the class
         self._check_model()
 
-        print("[INFO] Setting up Trainer object, and saving to `.trainer`.")
+        if self.verbose:
+            print("[INFO] Setting up Trainer object, and saving to `.trainer`.")
+
         if self.training_args is None:
             raise NotImplementedError(
                 "TrainingArgments have not been set in `.training_args`. "
                 "Call `.set_up_training_args()` first."
             )
+
         if self.dataset_split is None:
             raise ValueError(
                 "Dataset has not been split up into train / test (and validation) sets. "
                 "Call `.split_dataset()` first."
             )
+
         if data_collator is None:
             # use the existing data collator
             data_collator = self.data_collator
@@ -1286,6 +1326,13 @@ def fit_transformer_with_trainer_api(
                 **trainer_args,
             )
 
-        print(f"[INFO] Training model with {self.model.num_parameters()} parameters...")
+        if self.verbose:
+            print(
+                f"[INFO] Training model with {self.model.num_parameters()} parameters..."
+            )
+
+        # train model
         self.trainer.train()
-        print("[INFO] Training completed!")
+
+        if self.verbose:
+            print("[INFO] Training completed!")