Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add extract_readable_strings function with advanced filtering support and reorganize string extraction code #2720

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/include/ndpi_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1142,6 +1142,25 @@ extern "C" {
int ndpi_has_human_readeable_string(char *buffer, u_int buffer_size,
u_int8_t min_string_match_len, /* Will return 0 if no string > min_string_match_len have been found */
char *outbuf, u_int outbuf_len);

/**
* @struct string_list_t
* @brief Represents a dynamic list of strings.
*
* This structure manages a dynamic array of strings, keeping track of the number of
* stored items and the allocated capacity.
*/
typedef struct {
char **items; ///< Array of strings
size_t count; ///< Number of strings currently stored
size_t capacity; ///< Allocated capacity of the list
} string_list_t;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use ndpi_ as prefix for all public functions and structures.
Here ndpi_string_list_t


void string_list_free(string_list_t *list);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here...


string_list_t* extract_readable_strings(const unsigned char *buffer, size_t buffer_len,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... and here

size_t min_len, size_t list_limit, bool (*filter_func)(char *));

/* Return a flow info string (summarized). Does only work for DNS/HTTP/TLS/QUIC. */
const char* ndpi_get_flow_info(struct ndpi_flow_struct const * const flow,
ndpi_protocol const * const l7_protocol);
Expand Down
112 changes: 0 additions & 112 deletions src/lib/ndpi_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -670,118 +670,6 @@ const char* ndpi_cipher2str(u_int32_t cipher, char unknown_cipher[8]) {
}
}

/* ******************************************************************** */

static inline int ndpi_is_other_char(char c) {
return((c == '.')
|| (c == ' ')
|| (c == '@')
|| (c == '/')
);
}

/* ******************************************************************** */

static int _ndpi_is_valid_char(char c) {
if(ndpi_ispunct(c) && (!ndpi_is_other_char(c)))
return(0);
else
return(ndpi_isdigit(c)
|| ndpi_isalpha(c)
|| ndpi_is_other_char(c));
}
static char ndpi_is_valid_char_tbl[256],ndpi_is_valid_char_tbl_init=0;

static void _ndpi_is_valid_char_init(void) {
int c;
for(c=0; c < 256; c++) ndpi_is_valid_char_tbl[c] = _ndpi_is_valid_char(c);
ndpi_is_valid_char_tbl_init = 1;
}
static inline int ndpi_is_valid_char(char c) {
if(!ndpi_is_valid_char_tbl_init)
_ndpi_is_valid_char_init();
return ndpi_is_valid_char_tbl[(unsigned char)c];
}

/* ******************************************************************** */

static int ndpi_find_non_eng_bigrams(char *str) {
char s[3];

if((ndpi_isdigit(str[0]) && ndpi_isdigit(str[1]))
|| ndpi_is_other_char(str[0])
|| ndpi_is_other_char(str[1])
)
return(1);

s[0] = tolower(str[0]), s[1] = tolower(str[1]), s[2] = '\0';

return(ndpi_match_bigram(s));
}

/* ******************************************************************** */

/* #define PRINT_STRINGS 1 */

int ndpi_has_human_readeable_string(char *buffer, u_int buffer_size,
u_int8_t min_string_match_len,
char *outbuf, u_int outbuf_len) {
u_int ret = 0, i, do_cr = 0, len = 0, o_idx = 0, being_o_idx = 0;

if(buffer_size <= 0)
return(0);

outbuf_len--;
outbuf[outbuf_len] = '\0';

for(i=0; i<buffer_size-2; i++) {
if(ndpi_is_valid_char(buffer[i])
&& ndpi_is_valid_char(buffer[i+1])
&& ndpi_find_non_eng_bigrams(&buffer[i])) {
#ifdef PRINT_STRINGS
printf("%c%c", buffer[i], buffer[i+1]);
#endif
if(o_idx < outbuf_len) outbuf[o_idx++] = buffer[i];
if(o_idx < outbuf_len) outbuf[o_idx++] = buffer[i+1];
do_cr = 1, i += 1, len += 2;
} else {
if(ndpi_is_valid_char(buffer[i]) && do_cr) {
#ifdef PRINT_STRINGS
printf("%c", buffer[i]);
#endif
if(o_idx < outbuf_len) outbuf[o_idx++] = buffer[i];
len += 1;
}

// printf("->> %c%c\n", ndpi_isprint(buffer[i]) ? buffer[i] : '.', ndpi_isprint(buffer[i+1]) ? buffer[i+1] : '.');
if(do_cr) {
if(len > min_string_match_len)
ret = 1;
else {
o_idx = being_o_idx;
being_o_idx = o_idx;
outbuf[o_idx] = '\0';
}

#ifdef PRINT_STRINGS
printf(" [len: %u]%s\n", len, ret ? "<-- HIT" : "");
#endif

if(ret)
break;

do_cr = 0, len = 0;
}
}
}

#ifdef PRINT_STRINGS
printf("=======>> Found string: %u\n", ret);
#endif

return(ret);
}

/* ********************************** */

static const char* ndpi_get_flow_info_by_proto_id(struct ndpi_flow_struct const * const flow,
Expand Down
Loading
Loading