diff --git a/genie/process_functions.py b/genie/process_functions.py
index adea2f4f..d00787c7 100644
--- a/genie/process_functions.py
+++ b/genie/process_functions.py
@@ -980,3 +980,118 @@ def create_missing_columns(dataset: pd.DataFrame, schema: dict) -> pd.Series:
         elif data_type == "boolean":
             dataset[column] = dataset[column].astype(pd.BooleanDtype())
     return dataset[list(schema.keys())]
+
+
+def get_row_indices_for_invalid_column_values(
+    df: pd.DataFrame,
+    col: str,
+    possible_values: list,
+    na_allowed: bool = False,
+    sep: Optional[str] = None,
+) -> pd.Index:
+    """This function checks the column values against possible_values and returns row indices of invalid rows.
+
+    Args:
+        df (pd.DataFrame): Input dataframe
+        col (str): The column to be checked
+        possible_values (list): The list of possible values
+        na_allowed (bool, optional): If NA is allowed. Defaults to False.
+        sep (Optional[str], optional): The string separator. Defaults to None.
+
+    Returns:
+        pd.Index: The row indices of the rows with values that are not in possible_values.
+    """
+    if na_allowed:
+        # this is only useful for dropping NAs for individual values rather than value_list
+        check_values = df[col].dropna()
+    else:
+        check_values = df[col]
+    if sep:
+        # for columns contain lists of values
+        check_values = check_values.apply(
+            lambda x: all(substring in possible_values for substring in x.split(sep))
+        )
+    else:
+        check_values = check_values.apply(lambda x: x in possible_values)
+    return check_values[check_values == False].index
+
+
+def get_message_for_invalid_column_value(
+    col: str, filename: str, invalid_indices: pd.Index, possible_values: list
+) -> tuple:
+    """This function returns the error and warning messages if the target column has rows with invalid values.
+
+    Args:
+        col (str): The column to be checked
+        filename (str): The file name
+        invalid_indices (pd.Index): The row indices of the rows with invalid values
+        possible_values (list): The list of possible values
+
+    Returns:
+        tuple: warning, error
+    """
+    warning = ""
+    error = ""
+    # check the validity of values in the column
+    # concatenated possible values. This is done because of pandas typing. An integer column with one NA/blank value will be cast as a double.
+    possible_values = ", ".join(
+        [str(value).replace(".0", "") for value in possible_values]
+    )
+    if len(invalid_indices) > 0:
+        error = (
+            f"{filename}: Please double check your {col} column. Valid values are {possible_values}. "
+            f"You have {len(invalid_indices)} row(s) in your file where {col} column contains invalid values. "
+            f"The row(s) this occurs in are: {invalid_indices.tolist()}. Please correct.\n"
+        )
+    return (warning, error)
+
+
+def check_column_and_values_row_specific(
+    df: pd.DataFrame,
+    col: str,
+    possible_values: list,
+    filename: str,
+    na_allowed: bool = False,
+    required: bool = False,
+    sep: Optional[str] = None,
+) -> tuple:
+    """This function checks if the column exists and checks if the values in the column have the valid values.
+       Currently, this function is only used in assay.py
+
+    Args:
+        df (pd.DataFrame): Input dataframe
+        col (str): The column to be checked
+        possible_values (list): The list of possible values
+        filename (str): The file name
+        na_allowed (bool, optional): If NA is allowed. Defaults to False.
+        required (bool, optional): If the column is required. Defaults to False.
+        sep (Optional[str], optional): The string separator. Defaults to None.
+
+    Returns:
+        tuple: warning, error
+    """
+    warning = ""
+    error = ""
+    # check the existence of the column
+    have_column = checkColExist(df, col)
+    if not have_column:
+        if required:
+            error = "{filename}: Must have {col} column.\n".format(
+                filename=filename, col=col
+            )
+        else:
+            warning = (
+                "{filename}: Doesn't have {col} column. "
+                "This column will be added.\n".format(filename=filename, col=col)
+            )
+    else:
+        # get the row indices
+        invalid_indices = get_row_indices_for_invalid_column_values(
+            df, col, possible_values, na_allowed, sep
+        )
+        # generate validation message
+        warning, error = get_message_for_invalid_column_value(
+            col, filename, invalid_indices, possible_values
+        )
+
+    return (warning, error)
diff --git a/genie_registry/assay.py b/genie_registry/assay.py
index 33362ddc..fcca0452 100644
--- a/genie_registry/assay.py
+++ b/genie_registry/assay.py
@@ -1,12 +1,11 @@
 """Assay information class"""
 
 import os
-import yaml
 
 import pandas as pd
-
-from genie.example_filetype_format import FileTypeFormat
+import yaml
 from genie import extract, load, process_functions
+from genie.example_filetype_format import FileTypeFormat
 
 
 class Assayinfo(FileTypeFormat):
@@ -16,7 +15,7 @@ class Assayinfo(FileTypeFormat):
 
     _process_kwargs = ["newPath", "databaseSynId"]
 
-    _validation_kwargs = ["project_id"]
+    # _validation_kwargs = ["project_id"]
 
     def _validateFilename(self, filepath_list):
         """Validate assay information filename"""
@@ -128,7 +127,7 @@ def _get_dataframe(self, filepath_list):
             all_panel_info = pd.concat([all_panel_info, assay_finaldf])
         return all_panel_info
 
-    def _validate(self, assay_info_df, project_id):
+    def _validate(self, assay_info_df):
         """
         Validates the values of assay information file
 
@@ -202,7 +201,7 @@ def _validate(self, assay_info_df, project_id):
         warn, error = process_functions.check_col_and_values(
             assay_info_df,
             "library_strategy",
-            read_group_headers["library_strategy"]["enum"],
+            ["Targeted Sequencing", "WXS"],
             filename="Assay_information.yaml",
             required=True,
         )
@@ -231,16 +230,6 @@ def _validate(self, assay_info_df, project_id):
         warning += warn
         total_error += error
 
-        # target_capture_kit = read_group_headers['target_capture_kit']['enum']
-        # warn, error = process_functions.check_col_and_values(
-        #     assay_info_df,
-        #     'target_capture_kit',
-        #     target_capture_kit,
-        #     filename="Assay_information.yaml",
-        #     required=True)
-        # warning += warn
-        # total_error += error
-
         if not process_functions.checkColExist(assay_info_df, "target_capture_kit"):
             total_error += (
                 "Assay_information.yaml: " "Must have target_capture_kit column.\n"
diff --git a/tests/test_assay.py b/tests/test_assay.py
index 691c693e..dc186621 100644
--- a/tests/test_assay.py
+++ b/tests/test_assay.py
@@ -5,9 +5,8 @@
 
 import pandas as pd
 import pytest
-
-from genie_registry.assay import Assayinfo
 from genie import extract, process_functions
+from genie_registry.assay import Assayinfo
 
 GDC_DATA_DICT = {
     "properties": {
@@ -45,7 +44,7 @@ def test_validinput__validate(assay_info):
     assay_info_dict = {
         "SEQ_ASSAY_ID": ["SAGE-1", "SAGE-3"],
         "is_paired_end": [True, False],
-        "library_strategy": ["value1", "value2"],
+        "library_strategy": ["Targeted Sequencing", "WXS"],
         "library_selection": ["value1", "value2"],
         "platform": ["value1", "value2"],
         "instrument_model": ["value1", "value2"],
@@ -68,18 +67,18 @@ def test_validinput__validate(assay_info):
     ), patch.object(
         process_functions, "get_gdc_data_dictionary", return_value=test_dict
     ) as patch_get_gdc:
-        error, warning = assay_info._validate(assay_info_df, "syn9999")
+        error, warning = assay_info._validate(assay_info_df)
         assert error == ""
         assert warning == ""
         patch_get_gdc.assert_called()
 
 
 def test_case__validate(assay_info):
-    """Valid input should have no errors or warnings"""
+    """Valid input with lowercase SEQ_ASSAY_ID, should have no errors or warnings"""
     assay_info_dict = {
         "SEQ_ASSAY_ID": ["sage-1", "SAGE-3"],
         "is_paired_end": [True, False],
-        "library_strategy": ["value1", "value2"],
+        "library_strategy": ["Targeted Sequencing", "WXS"],
         "library_selection": ["value1", "value2"],
         "platform": ["value1", "value2"],
         "instrument_model": ["value1", "value2"],
@@ -102,18 +101,18 @@ def test_case__validate(assay_info):
     ), patch.object(
         process_functions, "get_gdc_data_dictionary", return_value=test_dict
     ) as patch_get_gdc:
-        error, warning = assay_info._validate(assay_info_df, "syn9999")
+        error, warning = assay_info._validate(assay_info_df)
         assert error == ""
         assert warning == ""
         patch_get_gdc.assert_called()
 
 
 def test_underscore__validate(assay_info):
-    """Valid input should have no errors or warnings"""
+    """Valid input with underscore in SEQ_ASSAY_ID, should have no errors or warnings"""
     assay_info_dict = {
         "SEQ_ASSAY_ID": ["SAGE_1", "SAGE-3"],
         "is_paired_end": [True, False],
-        "library_strategy": ["value1", "value2"],
+        "library_strategy": ["Targeted Sequencing", "WXS"],
         "library_selection": ["value1", "value2"],
         "platform": ["value1", "value2"],
         "instrument_model": ["value1", "value2"],
@@ -136,7 +135,7 @@ def test_underscore__validate(assay_info):
     ), patch.object(
         process_functions, "get_gdc_data_dictionary", return_value=test_dict
     ) as patch_get_gdc:
-        error, warning = assay_info._validate(assay_info_df, "syn9999")
+        error, warning = assay_info._validate(assay_info_df)
         assert error == ""
         assert warning == ""
         patch_get_gdc.assert_called()
@@ -149,7 +148,7 @@ def test__missingcols__validate(assay_info):
     with patch.object(
         process_functions, "get_gdc_data_dictionary", return_value=test_dict
     ) as patch_get_gdc:
-        error, warning = assay_info._validate(assay_info_df, "syn99999")
+        error, warning = assay_info._validate(assay_info_df)
     expected_errors = (
         "Assay_information.yaml: Must have SEQ_ASSAY_ID column.\n"
         "Assay_information.yaml: Must have is_paired_end column.\n"
@@ -230,7 +229,7 @@ def test_invalid__validate(assay_info):
     assay_info_dict = {
         "SEQ_ASSAY_ID": ["SAGE-1", "SAG-2"],
         "is_paired_end": [True, "foo"],
-        "library_strategy": ["foo", "ChIP-Seq"],
+        "library_strategy": ["foo", "WXS"],
         "library_selection": ["foo", "PCR"],
         "platform": ["foo", "Illumina"],
         "instrument_model": ["foo", "Illumina HiSeq 4000"],
@@ -256,7 +255,7 @@ def test_invalid__validate(assay_info):
     ), patch.object(
         process_functions, "get_gdc_data_dictionary", return_value=test_dict
     ) as patch_get_gdc:
-        error, warning = assay_info._validate(assay_info_df, "syn9999")
+        error, warning = assay_info._validate(assay_info_df)
         expected_errors = (
             "Assay_information.yaml: "
             "Please make sure all your SEQ_ASSAY_IDs start with your "
@@ -270,7 +269,7 @@ def test_invalid__validate(assay_info):
             "This column must only be these values: value1, value2\n"
             "Assay_information.yaml: "
             "Please double check your library_strategy column.  "
-            "This column must only be these values: value1, value2\n"
+            "This column must only be these values: Targeted Sequencing, WXS\n"
             "Assay_information.yaml: "
             "Please double check your platform column.  "
             "This column must only be these values: value1, value2\n"
diff --git a/tests/test_process_functions.py b/tests/test_process_functions.py
index 72f72663..5e918182 100644
--- a/tests/test_process_functions.py
+++ b/tests/test_process_functions.py
@@ -1,7 +1,10 @@
-from unittest.mock import Mock, patch
 import uuid
+from unittest.mock import Mock, patch
 
 import pandas as pd
+import pytest
+import synapseclient
+from genie import process_functions
 from pandas.api.types import (
     is_bool_dtype,
     is_float_dtype,
@@ -9,10 +12,6 @@
     is_string_dtype,
 )
 from pandas.testing import assert_frame_equal
-import pytest
-import synapseclient
-
-from genie import process_functions
 
 DATABASE_DF = pd.DataFrame(
     {
@@ -167,7 +166,7 @@ def test_append__append_rows():
     append_rows = process_functions._append_rows(new_datadf, DATABASE_DF, "UNIQUE_KEY")
     append_rows.fillna("", inplace=True)
     expecteddf.fillna("", inplace=True)
-    assert append_rows.equals(expecteddf[append_rows.columns])
+    assert_frame_equal(append_rows, expecteddf[append_rows.columns], check_dtype=False)
 
 
 def test___create_update_rowsdf():
@@ -626,7 +625,7 @@ def get_create_missing_columns_test_cases():
             "name": "empty_df",
             "test_input": pd.DataFrame({}),
             "test_schema": {"col1": "float"},
-            "expected_output": pd.DataFrame({"col1": []}, index=[]),
+            "expected_output": pd.DataFrame({"col1": []}, dtype=float),
             "expected_dtype": is_float_dtype,
             "expected_na_count": 0,
         },
@@ -660,7 +659,8 @@ def test_that_create_missing_columns_gets_expected_output_with_single_col_df(
     result = process_functions.create_missing_columns(
         dataset=test_cases["test_input"], schema=test_cases["test_schema"]
     )
-    assert_frame_equal(result, test_cases["expected_output"], check_exact=True)
+    result.reset_index(drop=True, inplace=True)
+    assert_frame_equal(result, test_cases["expected_output"], check_dtype=False)
     assert test_cases["expected_dtype"](result.iloc[:, 0])
     assert result.isna().sum().sum() == test_cases["expected_na_count"]
 
@@ -715,3 +715,246 @@ def test_that_create_missing_columns_returns_expected_output_with_multi_col_df()
     assert result.isna().sum().sum() == 11
 
     assert_frame_equal(result, expected_output, check_exact=True)
+
+
+def get_row_indices_for_invalid_column_values_test_cases():
+    return [
+        {
+            "name": "has_na_and_allowed",
+            "df": pd.DataFrame({"test_col": ["Val1", "Val2", float("nan"), None]}),
+            "col": "test_col",
+            "possible_values": ["Val1"],
+            "na_allowed": True,
+            "sep": None,
+            "expected_index": pd.Index([1]),
+        },
+        {
+            "name": "has_na_but_not_allowed",
+            "df": pd.DataFrame({"test_col": ["Val1", "Val2", float("nan"), None]}),
+            "col": "test_col",
+            "possible_values": ["Val1"],
+            "na_allowed": False,
+            "sep": None,
+            "expected_index": pd.Index([1, 2, 3]),
+        },
+        {
+            "name": "invalid_values_na_allowed",
+            "df": pd.DataFrame({"test_col": ["val1", "VAL1", float("nan"), None]}),
+            "col": "test_col",
+            "possible_values": ["Val1"],
+            "na_allowed": True,
+            "sep": None,
+            "expected_index": pd.Index([0, 1]),
+        },
+        {
+            "name": "invalid_values_na_not_allowed",
+            "df": pd.DataFrame({"test_col": ["val1", "VAL1", float("nan"), None]}),
+            "col": "test_col",
+            "possible_values": ["Val1"],
+            "na_allowed": False,
+            "sep": None,
+            "expected_index": pd.Index([0, 1, 2, 3]),
+        },
+        {
+            "name": "values_in_list",
+            "df": pd.DataFrame(
+                {
+                    "test_col": [
+                        "Val1;Val2",
+                        "Val1;Val2;Val3",
+                        "Val1",
+                        "Val1;",
+                        "Val1;None",
+                    ]
+                }
+            ),
+            "col": "test_col",
+            "possible_values": ["Val1", "Val2"],
+            "na_allowed": True,
+            "sep": ";",
+            "expected_index": pd.Index([1, 3, 4]),
+        },
+        {
+            "name": "valid_data",
+            "df": pd.DataFrame({"test_col": ["Val1", "Val2", "Val1;Val2"]}),
+            "col": "test_col",
+            "possible_values": ["Val1", "Val2"],
+            "na_allowed": False,
+            "sep": ";",
+            "expected_index": pd.Index([]),
+        },
+    ]
+
+
+@pytest.mark.parametrize(
+    "test_cases",
+    get_row_indices_for_invalid_column_values_test_cases(),
+    ids=lambda x: x["name"],
+)
+def test_get_row_indices_for_invalid_column_values(test_cases):
+    df = test_cases["df"]
+    col = test_cases["col"]
+    possible_values = test_cases["possible_values"]
+    na_allowed = test_cases["na_allowed"]
+    sep = test_cases["sep"]
+    results = process_functions.get_row_indices_for_invalid_column_values(
+        df, col, possible_values, na_allowed, sep
+    )
+    assert results.equals(test_cases["expected_index"])
+
+
+def get_message_for_invalid_column_value_test_cases():
+    return [
+        {
+            "name": "invalid_data",
+            "col": "test_col",
+            "filename": "test_filename",
+            "invalid_indices": pd.Index([1, 2, 3]),
+            "possible_values": ["Val1"],
+            "expected_error": "test_filename: Please double check your test_col column. Valid values are Val1. "
+            "You have 3 row(s) in your file where test_col column contains invalid values. "
+            "The row(s) this occurs in are: [1, 2, 3]. Please correct.\n",
+            "expected_warning": "",
+        },
+        {
+            "name": "valid_data",
+            "col": "test_col",
+            "filename": "test_filename",
+            "invalid_indices": pd.Index([]),
+            "possible_values": ["Val1", "Val2"],
+            "expected_error": "",
+            "expected_warning": "",
+        },
+    ]
+
+
+@pytest.mark.parametrize(
+    "test_cases",
+    get_message_for_invalid_column_value_test_cases(),
+    ids=lambda x: x["name"],
+)
+def test_get_message_for_invalid_column_value(test_cases):
+    col = test_cases["col"]
+    filename = test_cases["filename"]
+    invalid_indices = test_cases["invalid_indices"]
+    possible_values = test_cases["possible_values"]
+    warning, error = process_functions.get_message_for_invalid_column_value(
+        col, filename, invalid_indices, possible_values
+    )
+    assert warning == test_cases["expected_warning"]
+    assert error == test_cases["expected_error"]
+
+
+def check_col_and_values_row_specific_test_cases():
+    return [
+        {
+            "name": "valid_data_with_value_list",
+            "df": pd.DataFrame({"test_col": ["Val1", "Val2", "Val1;Val2"]}),
+            "col": "test_col",
+            "possible_values": ["Val1", "Val2"],
+            "filename": "test_filename",
+            "na_allowed": True,
+            "required": True,
+            "sep": ";",
+            "expected_error": "",
+            "expected_warning": "",
+        },
+        {
+            "name": "valid_data_with_individual_value_na_allowed",
+            "df": pd.DataFrame({"test_col": ["Val1", "Val2", float("nan"), None]}),
+            "col": "test_col",
+            "possible_values": ["Val1", "Val2"],
+            "filename": "test_filename",
+            "na_allowed": True,
+            "required": True,
+            "sep": ";",
+            "expected_error": "",
+            "expected_warning": "",
+        },
+        {
+            "name": "missing_required_column",
+            "df": pd.DataFrame({"test_col": ["Val1", "Val2", "Val1;Val2"]}),
+            "col": "test_col1",
+            "possible_values": ["Val1"],
+            "filename": "test_filename",
+            "na_allowed": True,
+            "required": True,
+            "sep": ";",
+            "expected_error": "test_filename: Must have test_col1 column.\n",
+            "expected_warning": "",
+        },
+        {
+            "name": "missing_optional_column",
+            "df": pd.DataFrame({"test_col": ["Val1", "Val2", "Val1;Val2"]}),
+            "col": "test_col1",
+            "possible_values": ["Val1"],
+            "filename": "test_filename",
+            "na_allowed": True,
+            "required": False,
+            "sep": ";",
+            "expected_error": "",
+            "expected_warning": "test_filename: Doesn't have test_col1 column. This column will be added.\n",
+        },
+        {
+            "name": "invalid_data_with_value_list",
+            "df": pd.DataFrame({"test_col": ["Val1", "Val2", "Val1;Val2"]}),
+            "col": "test_col",
+            "possible_values": ["Val1"],
+            "filename": "test_filename",
+            "na_allowed": True,
+            "required": True,
+            "sep": ";",
+            "expected_error": "test_filename: Please double check your test_col column. Valid values are Val1. "
+            "You have 2 row(s) in your file where test_col column contains invalid values. "
+            "The row(s) this occurs in are: [1, 2]. Please correct.\n",
+            "expected_warning": "",
+        },
+        {
+            "name": "invalid_data_with_individual_value_na_not_allowed",
+            "df": pd.DataFrame({"test_col": ["Val1", "Val2", "", float("nan"), None]}),
+            "col": "test_col",
+            "possible_values": ["Val1", "Val2"],
+            "filename": "test_filename",
+            "na_allowed": False,
+            "required": True,
+            "sep": None,
+            "expected_error": "test_filename: Please double check your test_col column. Valid values are Val1, Val2. "
+            "You have 3 row(s) in your file where test_col column contains invalid values. "
+            "The row(s) this occurs in are: [2, 3, 4]. Please correct.\n",
+            "expected_warning": "",
+        },
+        {
+            "name": "invalid_data_with_individual_value_na_allowed",
+            "df": pd.DataFrame({"test_col": ["Val1", "Val2", "", float("nan"), None]}),
+            "col": "test_col",
+            "possible_values": ["Val1"],
+            "filename": "test_filename",
+            "na_allowed": True,
+            "required": True,
+            "sep": None,
+            "expected_error": "test_filename: Please double check your test_col column. Valid values are Val1. "
+            "You have 2 row(s) in your file where test_col column contains invalid values. "
+            "The row(s) this occurs in are: [1, 2]. Please correct.\n",
+            "expected_warning": "",
+        },
+    ]
+
+
+@pytest.mark.parametrize(
+    "test_cases",
+    check_col_and_values_row_specific_test_cases(),
+    ids=lambda x: x["name"],
+)
+def test_check_col_and_values_row_specific(test_cases):
+    df = test_cases["df"]
+    col = test_cases["col"]
+    possible_values = test_cases["possible_values"]
+    filename = test_cases["filename"]
+    na_allowed = test_cases["na_allowed"]
+    required = test_cases["required"]
+    sep = test_cases["sep"]
+    warning, error = process_functions.check_column_and_values_row_specific(
+        df, col, possible_values, filename, na_allowed, required, sep
+    )
+    assert warning == test_cases["expected_warning"]
+    assert error == test_cases["expected_error"]