Add an heuristic to detect encrypted/obfuscated OpenVPN flows (#2547)

Based on the paper: "OpenVPN is Open to VPN Fingerprinting" See: https://www.usenix.org/conference/usenixsecurity22/presentation/xue-diwen Basic idea: * the distribution of the first byte of the messages (i.e. the distribution of the op-codes) is quite unique * this fingerprint might be still detectable even if the OpenVPN packets are somehow fully encrypted/obfuscated The heuristic is disabled by default.
ntop · Sep 16, 2024 · 0ddbda1 · 0ddbda1
1 parent 47ea30f
commit 0ddbda1
Show file tree

Hide file tree

Showing 13 changed files with 396 additions and 32 deletions.
diff --git a/doc/configuration_parameters.md b/doc/configuration_parameters.md
@@ -49,6 +49,8 @@ TODO
 | "ookla"      | "dpi.aggressiveness",                     | 0x01          | 0x00      | 0x01      | Detection aggressiveness for Ookla. The value is a bitmask. Values: 0x0 = disabled; 0x01 = enable heuristic for detection over TLS (via Ookla LRU cache) |
 | "zoom"       | "max_packets_extra_dissection"            | 4             | 0         | 255       | After a flow has been classified has Zoom, nDPI might analyse more packets to look for a sub-classification or for metadata. This parameter set the upper limit on the number of these packets  |
 | "rtp"        | "search_for_stun"                         | disable       | NULL      | NULL      | After a flow has been classified as RTP or RTCP, nDPI might analyse more packets to look for STUN/DTLS packets, i.e. to try to tell if this flow is a "pure" RTP/RTCP flow or if the RTP/RTCP packets are multiplexed with STUN/DTLS. Useful for proper (sub)classification when the beginning of the flows are not captured or if there are lost packets in the the captured traffic. If enabled, nDPI requires more packets to process for each RTP/RTCP flow. |
+| "openvpn"    | "dpi.heuristics",                         | 0x00          | 0         | 0x01      | Enable/disable some heuristics to better detect OpenVPN. The value is a bitmask. Values: 0x0 = disabled; 0x01 = enable heuristic based on op-code frequency. If enabled, some false positives are expected. See: https://www.usenix.org/conference/usenixsecurity22/presentation/xue-diwen |
+| "openvpn"    | "dpi.heuristics.num_messages",            | 10            | 0         | 255       | If at least one OpenVPN heuristics is enabled (see `openvpn,"dpi.heuristics"`, this parameter set the maximum number of OpenVPN messages required for each flow. Note that an OpenVPN message may be splitted into multiple (TCP/UDP) packets and that a (TCP/UDP) packet may contains multiple OpenVPN messages. Higher the value, lower the false positive rate but more packets are required by nDPI for processing. |
 | "openvpn"    | "subclassification_by_ip"                 | enable        | NULL      | NULL      | Enable/disable sub-classification of OpenVPN flows using server IP. Useful to detect the specific VPN application/app. At the moment, this knob allows to identify: Mullvad, NordVPN, ProtonVPN. |
 | "wireguard"  | "subclassification_by_ip"                 | enable        | NULL      | NULL      | Enable/disable sub-classification of Wireguard flows using server IP. Useful to detect the specific VPN application/app. At the moment, this knob allows to identify: Mullvad, NordVPN, ProtonVPN. |
 | $PROTO_NAME  | "log"                                     | disable       | NULL      | NULL      | Enable/disable logging/debug for specific protocol. Use "any" as protocol name if you want to easily enable/disable logging/debug for all protocols |

diff --git a/fuzz/fuzz_config.cpp b/fuzz/fuzz_config.cpp
@@ -239,6 +239,14 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
     snprintf(cfg_value, sizeof(cfg_value), "%d", value);
     ndpi_set_config(ndpi_info_mod, "rtp", "search_for_stun", cfg_value);
   }
+  if(fuzzed_data.ConsumeBool()) {
+    value = fuzzed_data.ConsumeIntegralInRange(0, 0x01 + 1);
+    snprintf(cfg_value, sizeof(cfg_value), "%d", value);
+    ndpi_set_config(ndpi_info_mod, "openvpn", "dpi.heuristics", cfg_value);
+    value = fuzzed_data.ConsumeIntegralInRange(0, 255 + 1);
+    snprintf(cfg_value, sizeof(cfg_value), "%d", value);
+    ndpi_set_config(ndpi_info_mod, "openvpn", "dpi.heuristics.num_messages", cfg_value);
+  }
   if(fuzzed_data.ConsumeBool()) {
     value = fuzzed_data.ConsumeIntegralInRange(0, 0x01 + 1);
     snprintf(cfg_value, sizeof(cfg_value), "%d", value);

diff --git a/fuzz/fuzz_ndpi_reader.c b/fuzz/fuzz_ndpi_reader.c
@@ -89,6 +89,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     ndpi_set_config(workflow->ndpi_struct, "stun", "max_packets_extra_dissection", "255");
     ndpi_set_config(workflow->ndpi_struct, "zoom", "max_packets_extra_dissection", "255");
     ndpi_set_config(workflow->ndpi_struct, "rtp", "search_for_stun", "1");
+    ndpi_set_config(workflow->ndpi_struct, "openvpn", "dpi.heuristics", "0x01");
+    ndpi_set_config(workflow->ndpi_struct, "openvpn", "dpi.heuristics.num_messages", "255");
 
     ndpi_finalize_initialization(workflow->ndpi_struct);
 

diff --git a/src/include/ndpi_private.h b/src/include/ndpi_private.h
@@ -269,6 +269,8 @@ struct ndpi_detection_module_config_struct {
 
   int rtp_search_for_stun;
 
+  int openvpn_heuristics;
+  int openvpn_heuristics_num_msgs;
   int openvpn_subclassification_by_ip;
 
   int wireguard_subclassification_by_ip;
@@ -609,6 +611,8 @@ u_int ndpi_search_tcp_or_udp_raw(struct ndpi_detection_module_struct *ndpi_struc
 
 char* ndpi_intoav4(unsigned int addr, char* buf, u_int16_t bufLen);
 
+int is_flow_addr_informative(const struct ndpi_flow_struct *flow);
+
 u_int16_t icmp4_checksum(u_int8_t const * const buf, size_t len);
 
 ndpi_risk_enum ndpi_network_risk_ptree_match(struct ndpi_detection_module_struct *ndpi_str,

diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
@@ -169,7 +169,8 @@ typedef enum {
   NDPI_MALWARE_HOST_CONTACTED, /* Flow client contacted a malware host */
   NDPI_BINARY_DATA_TRANSFER,   /* Attempt to transfer something in binary format */
   NDPI_PROBING_ATTEMPT,        /* Probing attempt (e.g. TCP connection with no data exchanged or unidirection traffic for bidirectional flows such as SSH) */
-
+  NDPI_OBFUSCATED_TRAFFIC,
+
   /* Leave this as last member */
   NDPI_MAX_RISK /* must be <= 63 due to (**) */
 } ndpi_risk_enum;
@@ -791,6 +792,10 @@ struct ndpi_lru_cache {
 /* Ookla */
 #define NDPI_AGGRESSIVENESS_OOKLA_TLS			0x01 /* Enable detection over TLS (using ookla cache) */
 
+/* OpenVPN */
+#define NDPI_HEURISTICS_OPENVPN_OPCODE			0x01 /* Enable heuristic based on opcode frequency */
+
+
 /* ************************************************** */
 
 struct ndpi_flow_tcp_struct {
@@ -1520,6 +1525,14 @@ struct ndpi_flow_struct {
 
   /* NDPI_PROTOCOL_OPENVPN */
   u_int8_t ovpn_session_id[2][8];
+  u_int8_t ovpn_alg_standard_state : 2;
+  u_int8_t ovpn_alg_heur_opcode_state : 2;
+  u_int8_t ovpn_heur_opcode__codes_num : 4;
+  u_int8_t ovpn_heur_opcode__num_msgs;
+#define OPENVPN_HEUR_MAX_NUM_OPCODES 4
+  u_int8_t ovpn_heur_opcode__codes[OPENVPN_HEUR_MAX_NUM_OPCODES];
+  u_int8_t ovpn_heur_opcode__resets[2];
+  u_int16_t ovpn_heur_opcode__missing_bytes[2];
 
   /* NDPI_PROTOCOL_TINC */
   u_int8_t tinc_state;
@@ -1549,8 +1562,8 @@ struct ndpi_flow_struct {
 _Static_assert(sizeof(((struct ndpi_flow_struct *)0)->protos) <= 264,
                "Size of the struct member protocols increased to more than 264 bytes, "
                "please check if this change is necessary.");
-_Static_assert(sizeof(struct ndpi_flow_struct) <= 1136,
-               "Size of the flow struct increased to more than 1136 bytes, "
+_Static_assert(sizeof(struct ndpi_flow_struct) <= 1152,
+               "Size of the flow struct increased to more than 1152 bytes, "
                "please check if this change is necessary.");
 #endif
 #endif

diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
@@ -198,6 +198,7 @@ static ndpi_risk_info ndpi_known_risks[] = {
   { NDPI_MALWARE_HOST_CONTACTED,                NDPI_RISK_SEVERE, CLIENT_HIGH_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE },
   { NDPI_BINARY_DATA_TRANSFER,                  NDPI_RISK_MEDIUM, CLIENT_FAIR_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE },
   { NDPI_PROBING_ATTEMPT,                       NDPI_RISK_MEDIUM, CLIENT_FAIR_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE },
+  { NDPI_OBFUSCATED_TRAFFIC,                    NDPI_RISK_HIGH, CLIENT_HIGH_RISK_PERCENTAGE, NDPI_BOTH_ACCOUNTABLE },
 
   /* Leave this as last member */
   { NDPI_MAX_RISK,                              NDPI_RISK_LOW,    CLIENT_FAIR_RISK_PERCENTAGE, NDPI_NO_ACCOUNTABILITY   }
@@ -438,6 +439,38 @@ void ndpi_set_proto_category(struct ndpi_detection_module_struct *ndpi_str, u_in
 
 /* ********************************************************************************** */
 
+int is_flow_addr_informative(const struct ndpi_flow_struct *flow)
+{
+  /* The ideas is to tell if the address itself carries some useful information or not.
+     Examples:
+      a flow to a Facebook address is quite likely related to some Facebook apps
+      a flow to an AWS address might be potentially anything
+  */
+
+  switch(flow->guessed_protocol_id_by_ip) {
+  case NDPI_PROTOCOL_UNKNOWN:
+  /* This is basically the list of cloud providers supported by nDPI */
+  case NDPI_PROTOCOL_TENCENT:
+  case NDPI_PROTOCOL_EDGECAST:
+  case NDPI_PROTOCOL_ALIBABA:
+  case NDPI_PROTOCOL_YANDEX_CLOUD:
+  case NDPI_PROTOCOL_AMAZON_AWS:
+  case NDPI_PROTOCOL_MICROSOFT_AZURE:
+  case NDPI_PROTOCOL_CACHEFLY:
+  case NDPI_PROTOCOL_CLOUDFLARE:
+  case NDPI_PROTOCOL_GOOGLE_CLOUD:
+    return 0;
+  /* This is basically the list of VPNs (with **entry** addresses) supported by nDPI */
+  case NDPI_PROTOCOL_NORDVPN:
+  case NDPI_PROTOCOL_PROTONVPN:
+    return 0;
+  default:
+    return 1;
+  }
+}
+
+/* ********************************************************************************** */
+
 /*
   There are some (master) protocols that are informative, meaning that it shows
   what is the subprotocol about, but also that the subprotocol isn't a real protocol.
@@ -11439,6 +11472,8 @@ static const struct cfg_param {
 
   { "rtp",           "search_for_stun",                         "disable", NULL, NULL, CFG_PARAM_ENABLE_DISABLE, __OFF(rtp_search_for_stun), NULL },
 
+  { "openvpn",       "dpi.heuristics",                          "0x00", "0", "0x01", CFG_PARAM_INT, __OFF(openvpn_heuristics), NULL },
+  { "openvpn",       "dpi.heuristics.num_messages",             "10", "0", "255", CFG_PARAM_INT, __OFF(openvpn_heuristics_num_msgs), NULL },
   { "openvpn",       "subclassification_by_ip",                 "enable", NULL, NULL, CFG_PARAM_ENABLE_DISABLE, __OFF(openvpn_subclassification_by_ip), NULL },
 
   { "wireguard",     "subclassification_by_ip",                 "enable", NULL, NULL, CFG_PARAM_ENABLE_DISABLE, __OFF(wireguard_subclassification_by_ip), NULL },

diff --git a/src/lib/ndpi_utils.c b/src/lib/ndpi_utils.c
@@ -2097,6 +2097,9 @@ const char* ndpi_risk2str(ndpi_risk_enum risk) {
   case NDPI_PROBING_ATTEMPT:
     return("Probing Attempt");
 
+  case NDPI_OBFUSCATED_TRAFFIC:
+    return("Obfuscated Traffic");
+
   default:
     ndpi_snprintf(buf, sizeof(buf), "%d", (int)risk);
     return(buf);
@@ -2221,6 +2224,8 @@ const char* ndpi_risk2code(ndpi_risk_enum risk) {
     return STRINGIFY(NDPI_BINARY_DATA_TRANSFER);
   case NDPI_PROBING_ATTEMPT:
     return STRINGIFY(NDPI_PROBING_ATTEMPT);
+  case NDPI_OBFUSCATED_TRAFFIC:
+    return STRINGIFY(NDPI_OBFUSCATED_TRAFFIC);
 
   default:
     return("Unknown risk");
@@ -2342,6 +2347,8 @@ ndpi_risk_enum ndpi_code2risk(const char* risk) {
     return(NDPI_BINARY_DATA_TRANSFER);
   else if(strcmp(STRINGIFY(NDPI_PROBING_ATTEMPT), risk) == 0)
     return(NDPI_PROBING_ATTEMPT);
+  else if(strcmp(STRINGIFY(NDPI_OBFUSCATED_TRAFFIC), risk) == 0)
+    return(NDPI_OBFUSCATED_TRAFFIC);
   else
     return(NDPI_MAX_RISK);
 }