From e99db35508cbe6b5fe4a16b8d50fcfe8fdfdedcc Mon Sep 17 00:00:00 2001
From: Volodymyr Huti <volodymyr.huti@gmail.com>
Date: Thu, 14 Jul 2022 16:44:39 +0300
Subject: [PATCH] QPPB DEMO v1 (Kernel)

---
 include/net/dst.h              |  1 +
 include/net/ip_fib.h           |  5 ++++-
 include/net/route.h            |  1 +
 include/uapi/linux/rtnetlink.h |  2 ++
 net/ipv4/Makefile              |  5 +++++
 net/ipv4/fib_frontend.c        |  6 +++++-
 net/ipv4/fib_lookup.h          |  1 +
 net/ipv4/fib_semantics.c       |  4 ++++
 net/ipv4/fib_trie.c            |  6 ++++--
 net/ipv4/ip_forward.c          | 21 +++++++++++++++++++++
 net/ipv4/ip_output.c           | 10 ++++++++++
 net/ipv4/route.c               |  4 +++-
 12 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 6aa252c3fc55..013bdb418ba3 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -77,6 +77,7 @@ struct dst_entry {
 #ifndef CONFIG_64BIT
 	atomic_t		__refcnt;	/* 32-bit offset 64 */
 #endif
+        int edscp;
 	netdevice_tracker	dev_tracker;
 };
 
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a378eff827c7..ff5a6201fbd6 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -25,7 +25,8 @@
 
 struct fib_config {
 	u8			fc_dst_len;
-	dscp_t			fc_dscp;
+	dscp_t			fc_idscp;
+	dscp_t			fc_edscp;
 	u8			fc_protocol;
 	u8			fc_scope;
 	u8			fc_type;
@@ -172,6 +173,7 @@ struct fib_result {
 	unsigned char		type;
 	unsigned char		scope;
 	u32			tclassid;
+	dscp_t			edscp;
 	struct fib_nh_common	*nhc;
 	struct fib_info		*fi;
 	struct fib_table	*table;
@@ -213,6 +215,7 @@ struct fib_rt_info {
 	__be32			dst;
 	int			dst_len;
 	dscp_t			dscp;
+	dscp_t			edscp;
 	u8			type;
 	u8			offload:1,
 				trap:1,
diff --git a/include/net/route.h b/include/net/route.h
index 991a3985712d..71e069675810 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -70,6 +70,7 @@ struct rtable {
 	__u8			rt_uses_gateway;
 
 	int			rt_iif;
+	int			rt_edscp;
 
 	u8			rt_gw_family;
 	/* Info on neighbour */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 83849a37db5b..d39978d583f5 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -235,6 +235,7 @@ struct rtattr {
  ****/
 
 struct rtmsg {
+	/* unsigned char		rtm_dscp; */
 	unsigned char		rtm_family;
 	unsigned char		rtm_dst_len;
 	unsigned char		rtm_src_len;
@@ -336,6 +337,7 @@ enum rt_scope_t {
 #define RTM_F_FIB_MATCH	        0x2000	/* return full fib lookup match */
 #define RTM_F_OFFLOAD		0x4000	/* route is offloaded */
 #define RTM_F_TRAP		0x8000	/* route is trapping packets */
+#define RTM_F_DSCP		0x10000	/* route modifies the dscp mark */
 #define RTM_F_OFFLOAD_FAILED	0x20000000 /* route offload failed, this value
 					    * is chosen to avoid conflicts with
 					    * other flags defined in
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bbdd9c44f14e..c7e247af3f79 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -16,6 +16,11 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
 	     metrics.o netlink.o nexthop.o udp_tunnel_stub.o
 
+# ccflags-y += -O0 
+ccflags-y += -fno-default-inline -fno-inline -fno-inline-small-functions \
+		-fno-indirect-inlining -fno-inline-functions-called-once
+ccflags-y += -O1 -g3 -ggdb3
+
 obj-$(CONFIG_BPFILTER) += bpfilter/
 
 obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f361d3d56be2..d86bcbf99650 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -745,7 +745,11 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 		err = -EINVAL;
 		goto errout;
 	}
-	cfg->fc_dscp = inet_dsfield_to_dscp(rtm->rtm_tos);
+
+        if (rtm->rtm_flags & RTM_F_DSCP)
+            cfg->fc_edscp = inet_dsfield_to_dscp(rtm->rtm_tos);
+        else
+            cfg->fc_idscp = inet_dsfield_to_dscp(rtm->rtm_tos);
 
 	cfg->fc_dst_len = rtm->rtm_dst_len;
 	cfg->fc_table = rtm->rtm_table;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index f9b9e26c32c1..59bb6b9764b2 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -12,6 +12,7 @@ struct fib_alias {
 	struct hlist_node	fa_list;
 	struct fib_info		*fa_info;
 	dscp_t			fa_dscp;
+	dscp_t			fa_edscp;
 	u8			fa_type;
 	u8			fa_state;
 	u8			fa_slen;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a5439a8414d4..308c06eb770a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -525,6 +525,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 	fri.dst = key;
 	fri.dst_len = dst_len;
 	fri.dscp = fa->fa_dscp;
+	fri.edscp = fa->fa_edscp;
 	fri.type = fa->fa_type;
 	fri.offload = READ_ONCE(fa->offload);
 	fri.trap = READ_ONCE(fa->trap);
@@ -1783,6 +1784,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	rtm->rtm_dst_len = fri->dst_len;
 	rtm->rtm_src_len = 0;
 	rtm->rtm_tos = inet_dscp_to_dsfield(fri->dscp);
+	if (fi->fib_flags & RTM_F_DSCP)
+		rtm->rtm_tos = inet_dscp_to_dsfield(fri->edscp);
+		
 	if (tb_id < 256)
 		rtm->rtm_table = tb_id;
 	else
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2734c3af7e24..1b8aa417d7e1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1236,7 +1236,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 		goto err;
 	}
 
-	dscp = cfg->fc_dscp;
+	dscp = cfg->fc_idscp;
 	l = fib_find_node(t, &tp, key);
 	fa = l ? fib_find_alias(&l->leaf, slen, dscp, fi->fib_priority,
 				tb->tb_id, false) : NULL;
@@ -1362,6 +1362,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 
 	new_fa->fa_info = fi;
 	new_fa->fa_dscp = dscp;
+	new_fa->fa_edscp = cfg->fc_edscp;
 	new_fa->fa_type = cfg->fc_type;
 	new_fa->fa_state = 0;
 	new_fa->fa_slen = slen;
@@ -1621,6 +1622,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 			res->nh_sel = nhsel;
 			res->nhc = nhc;
 			res->type = fa->fa_type;
+                        res->edscp = fa->fa_edscp;
 			res->scope = fi->fib_scope;
 			res->fi = fi;
 			res->table = tb;
@@ -1723,7 +1725,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 	if (!l)
 		return -ESRCH;
 
-	dscp = cfg->fc_dscp;
+	dscp = cfg->fc_idscp;
 	fa = fib_find_alias(&l->leaf, slen, dscp, 0, tb->tb_id, false);
 	if (!fa)
 		return -ESRCH;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e3aa436a1bdf..3fbaeee43dcb 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -157,6 +157,27 @@ int ip_forward(struct sk_buff *skb)
 	    !skb_sec_path(skb))
 		ip_rt_send_redirect(skb);
 
+#if 0 
+        #
+        # bgp-policy source lookup
+        # we need to lookup route cache entry for the packet sender - `src` address
+        # dscp mark then isapplied from the `dst` cache entry
+        # currently doesn`t full work, need to research the lookup algo details
+        #
+        struct rtable *irt:
+        irt = ip_route_output(net, iph->saddr, iph->daddr, 0, skb->skb_iif); <<< fails
+        irt = ip_route_output(net, iph->saddr, 0, 0, 0);                     <<< doesn`t fail, wrong entry returned
+        if (!IS_ERR(irt)) {
+                printk("QQQ irt [src=%pI4|dst=%pI4|rt_dscp=%d|rrt_dscp=%d|gw=%pI4]\n",
+                        &iph->saddr, &iph->daddr, rt->dst.edscp, irt->dst.edscp,
+                        &rt->rt_gw4);
+        } else {
+               if (gdebug) debug();
+               printk("QQQ src_lookup failed %d", PTR_ERR(irt));
+        }
+#endif
+
+        ipv4_change_dsfield(iph, 0, rt->dst.edscp);
 	if (net->ipv4.sysctl_ip_fwd_update_priority)
 		skb->priority = rt_tos2priority(iph->tos);
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 00b4bf26fd93..35a0d951d1bc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -422,6 +422,16 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
 
+#if 0
+	struct iphdr *iph;	/* Our header */
+	struct rtable *rt;	/* Route we use */
+
+	rt = skb_rtable(skb);
+	iph = ip_hdr(skb);
+
+        ipv4_change_dsfield(iph, 0, rt->dst.dscp);
+#endif
+
 	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
 
 	skb->dev = dev;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2d16bcc7d346..6e33a58c3a11 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1857,6 +1857,8 @@ static int __mkroute_input(struct sk_buff *skb,
 		else
 			rth = rcu_dereference(nhc->nhc_rth_input);
 		if (rt_cache_valid(rth)) {
+                        rth->dst.edscp = res->edscp;
+                        printk("QQQ YAY [%d]\n", rth->dst.edscp);
 			skb_dst_set_noref(skb, &rth->dst);
 			goto out;
 		}
@@ -1871,8 +1873,8 @@ static int __mkroute_input(struct sk_buff *skb,
 
 	rth->rt_is_input = 1;
 	RT_CACHE_STAT_INC(in_slow_tot);
-
 	rth->dst.input = ip_forward;
+        rth->dst.edscp = res->edscp;
 
 	rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
 		       do_cache);
-- 
2.25.1

