--- linux-2.6.22.14/net/sched/sch_htb.c 2008-01-22 13:57:01.000000000 +0200 +++ linux/net/sched/sch_htb.c 2008-01-15 10:57:01.000000000 +0200 @@ -68,11 +68,14 @@ one less than their parent. */ -#define HTB_HSIZE 16 /* classid hash size */ -#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ +#define HTB_HSIZE 1024 /* classid hash size */ +#define HTB_RATE_CHUNKS 8 +#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_RATE_CHUNKS sec */ #define HTB_RATECM 1 /* whether to use rate computer */ #define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ +#define HTB_RATEMEASURE 16 /* count of rate measurements to keep in memory */ +#define HTB_RATESHIFT 4 /* bits to shift to get avg rate from sum */ #if HTB_VER >> 16 != TC_HTB_PROTOVER #error "Mismatched sch_htb.c and pkt_sch.h" @@ -142,8 +145,22 @@ struct htb_class { /* token bucket parameters */ struct qdisc_rate_table *rate; /* rate table of the class itself */ + struct qdisc_rate_table *actual_ceil; /* either ceil or burst */ struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ + struct qdisc_rate_table *burst; /* burst rate */ + + unsigned thr_ceil; /* threshold above which we switch to ceil */ + unsigned thr_burst; /* threshold below which we switch to burst */ + unsigned measure_interval; /* rate measure interval in jiffies */ + unsigned rates[HTB_RATEMEASURE]; /* rates for previous time intervals */ + unsigned rate_index; /* last filled rate in 'rates' array */ + unsigned long long rate_sum; /* sum of 'rates' array */ + unsigned long long prev_bytes; /* stat bytes at prev timer execution */ + struct timer_list burst_timer; /* burst rate limiting */ + struct Qdisc *qdisc; /* needed to get lock */ + long buffer, cbuffer; /* token bucket depth/rate */ + long acbuffer, bbuffer; psched_tdiff_t mbuffer; /* max wait time */ long tokens, ctokens; /* current number of tokens */ psched_time_t t_c; /* checkpoint time */ @@ -153,15 +170,12 @@ struct htb_class { /* of un.leaf originals should be done. */ }; -/* TODO: maybe compute rate when size is too large .. or drop ? */ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, int size) { int slot = size >> rate->rate.cell_log; - if (slot > 255) { - cl->xstats.giants++; - slot = 255; - } + if (slot > 255) + return (rate->data[255]*(slot >> 8) + rate->data[slot & 0xFF]); return rate->data[slot]; } @@ -209,12 +223,15 @@ struct htb_sched { /* compute hash of size HTB_HSIZE for given handle */ static inline int htb_hash(u32 h) { -#if HTB_HSIZE != 16 +#if HTB_HSIZE == 1024 + return h & (HTB_HSIZE - 1); +#elif HTB_HSIZE != 16 #error "Declare new hash for your HTB_HSIZE" -#endif +#else h ^= h >> 8; /* stolen from cbq_hash */ h ^= h >> 4; return h & 0xf; +#endif } /* find class in global hash table using given handle */ @@ -505,7 +522,7 @@ static void htb_deactivate_prios(struct #if HTB_HYSTERESIS static inline long htb_lowater(const struct htb_class *cl) { - return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; + return cl->cmode != HTB_CANT_SEND ? -cl->acbuffer : 0; } static inline long htb_hiwater(const struct htb_class *cl) { @@ -636,6 +653,7 @@ static int htb_enqueue(struct sk_buff *s } else { cl->bstats.packets++; cl->bstats.bytes += skb->len; + cl->qstats.backlog += skb->len; htb_activate(q, cl); } @@ -669,8 +687,10 @@ static int htb_requeue(struct sk_buff *s sch->qstats.drops++; cl->qstats.drops++; return NET_XMIT_DROP; - } else + } else { + cl->qstats.backlog += skb->len; htb_activate(q, cl); + } sch->q.qlen++; sch->qstats.requeues++; @@ -685,7 +705,7 @@ static void htb_rate_timer(unsigned long struct htb_sched *q = qdisc_priv(sch); struct hlist_node *p; struct htb_class *cl; - + unsigned i; /* lock queue so that we can muck with it */ spin_lock_bh(&sch->dev->queue_lock); @@ -693,6 +713,7 @@ static void htb_rate_timer(unsigned long q->rttim.expires = jiffies + HZ; add_timer(&q->rttim); + for (i = 0; i < (HTB_HSIZE / HTB_RATE_CHUNKS); ++i) { /* scan and recompute one bucket at time */ if (++q->recmp_bucket >= HTB_HSIZE) q->recmp_bucket = 0; @@ -701,10 +722,51 @@ static void htb_rate_timer(unsigned long RT_GEN(cl->sum_bytes, cl->rate_bytes); RT_GEN(cl->sum_packets, cl->rate_packets); } + } + spin_unlock_bh(&sch->dev->queue_lock); } #endif +static void htb_burst_timer(unsigned long arg) { + struct htb_class *cl = (struct htb_class *)arg; + unsigned current_rate, avg_rate; + + spin_lock_bh(&cl->qdisc->dev->queue_lock); + + current_rate = cl->bstats.bytes - cl->prev_bytes; + current_rate = current_rate * HZ / cl->measure_interval; + cl->prev_bytes = cl->bstats.bytes; + + cl->rates[cl->rate_index] = current_rate; + cl->rate_sum += current_rate; + ++cl->rate_index; + if (cl->rate_index == HTB_RATEMEASURE) cl->rate_index = 0; + avg_rate = cl->rate_sum >> HTB_RATESHIFT; + cl->rate_sum -= cl->rates[cl->rate_index]; + + if (cl->actual_ceil == cl->burst) { + if (avg_rate > cl->thr_ceil) { + // switch to ceil + cl->actual_ceil = cl->ceil; + cl->acbuffer = cl->cbuffer; + } + } + else { + if (avg_rate < cl->thr_burst) { + // switch to burst + cl->actual_ceil = cl->burst; + cl->acbuffer = cl->bbuffer; + } + } + + cl->burst_timer.expires = jiffies + cl->measure_interval; + add_timer(&cl->burst_timer); + + spin_unlock_bh(&cl->qdisc->dev->queue_lock); +} + + /** * htb_charge_class - charges amount "bytes" to leaf and ancestors * @@ -738,7 +800,7 @@ static void htb_charge_class(struct htb_ cl->xstats.borrows++; cl->tokens += diff; /* we moved t_c; update tokens */ } - HTB_ACCNT(ctokens, cbuffer, ceil); + HTB_ACCNT(ctokens, acbuffer, actual_ceil); cl->t_c = q->now; old_mode = cl->cmode; @@ -944,6 +1006,7 @@ next: if (!cl->un.leaf.q->q.qlen) htb_deactivate(q, cl); htb_charge_class(q, cl, level, skb->len); + cl->qstats.backlog -= skb->len; } return skb; } @@ -1157,6 +1220,10 @@ static int htb_dump_class(struct Qdisc * opt.buffer = cl->buffer; opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer; + if (cl->burst) { + opt.burst = cl->burst->rate; + opt.bbuffer = cl->bbuffer; + } opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio; opt.level = cl->level; @@ -1176,8 +1243,8 @@ htb_dump_class_stats(struct Qdisc *sch, struct htb_class *cl = (struct htb_class *)arg; #ifdef HTB_RATECM - cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE); - cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE); + cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_RATE_CHUNKS); + cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_RATE_CHUNKS); #endif if (!cl->level && cl->un.leaf.q) @@ -1280,6 +1347,11 @@ static void htb_destroy_class(struct Qdi qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); + if (cl->burst) { + qdisc_put_rtab(cl->burst); + del_timer_sync(&cl->burst_timer); + } + tcf_destroy_chain(cl->filter_list); while (!list_empty(&cl->children)) @@ -1381,7 +1453,7 @@ static int htb_change_class(struct Qdisc struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct rtattr *opt = tca[TCA_OPTIONS - 1]; - struct qdisc_rate_table *rtab = NULL, *ctab = NULL; + struct qdisc_rate_table *rtab = NULL, *ctab = NULL, *btab = NULL; struct rtattr *tb[TCA_HTB_RTAB]; struct tc_htb_opt *hopt; @@ -1397,6 +1469,7 @@ static int htb_change_class(struct Qdisc rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]); ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]); + btab = qdisc_get_rtab(&hopt->burst, tb[TCA_HTB_BTAB - 1]); if (!rtab || !ctab) goto failure; @@ -1460,7 +1533,8 @@ static int htb_change_class(struct Qdisc /* set class to be in HTB_CAN_SEND state */ cl->tokens = hopt->buffer; - cl->ctokens = hopt->cbuffer; + if (btab) cl->ctokens = hopt->bbuffer; + else cl->ctokens = hopt->cbuffer; cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */ cl->t_c = psched_get_time(); cl->cmode = HTB_CAN_SEND; @@ -1469,6 +1543,12 @@ static int htb_change_class(struct Qdisc hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); + + init_timer(&cl->burst_timer); + cl->burst_timer.function = htb_burst_timer; + cl->burst_timer.data = (unsigned long) cl; + + cl->qdisc = sch; } else sch_tree_lock(sch); @@ -1477,9 +1557,11 @@ static int htb_change_class(struct Qdisc if (!cl->level) { cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum; if (!hopt->quantum && cl->un.leaf.quantum < 1000) { +#if 0 printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid); +#endif cl->un.leaf.quantum = 1000; } if (!hopt->quantum && cl->un.leaf.quantum > 200000) { @@ -1498,14 +1580,36 @@ static int htb_change_class(struct Qdisc cl->prio = cl->un.leaf.prio; } + cl->thr_ceil = hopt->thr_ceil; + cl->thr_burst = hopt->thr_burst; + cl->measure_interval = hopt->interval * HZ / HTB_RATEMEASURE; + memset(cl->rates, 0, sizeof(unsigned) * HTB_RATEMEASURE); + cl->rate_sum = 0; + cl->prev_bytes = cl->bstats.bytes; + cl->buffer = hopt->buffer; cl->cbuffer = hopt->cbuffer; + cl->bbuffer = hopt->bbuffer; if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab; if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab; + if (cl->burst) + qdisc_put_rtab(cl->burst); + cl->burst = btab; + + if (btab) { + mod_timer(&cl->burst_timer, jiffies + cl->measure_interval); + cl->actual_ceil = cl->burst; + cl->acbuffer = cl->bbuffer; + } + else { + cl->actual_ceil = cl->ceil; + cl->acbuffer = cl->cbuffer; + } + sch_tree_unlock(sch); *arg = (unsigned long)cl; @@ -1516,6 +1620,8 @@ failure: qdisc_put_rtab(rtab); if (ctab) qdisc_put_rtab(ctab); + if (btab) + qdisc_put_rtab(btab); return err; }