41#include <sys/kernel.h>
44#include <sys/malloc.h>
47#include <sys/sysctl.h>
59 "CAM I/O Scheduler buffers");
61static SYSCTL_NODE(_kern_cam, OID_AUTO, iosched, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
62 "CAM I/O Scheduler parameters");
74#ifdef CAM_IOSCHED_DYNAMIC
76static bool do_dynamic_iosched =
true;
77SYSCTL_BOOL(_kern_cam_iosched, OID_AUTO, dynamic, CTLFLAG_RD | CTLFLAG_TUN,
78 &do_dynamic_iosched, 1,
79 "Enable Dynamic I/O scheduler optimizations.");
101static int alpha_bits = 9;
102SYSCTL_INT(_kern_cam_iosched, OID_AUTO, alpha_bits, CTLFLAG_RW | CTLFLAG_TUN,
104 "Bits in EMA's alpha.");
115#define BUCKET_BASE ((SBT_1S / 50000) + 1)
117static sbintime_t bucket_base = BUCKET_BASE;
118SYSCTL_SBINTIME_USEC(_kern_cam_iosched, OID_AUTO, bucket_base_us, CTLFLAG_RD,
120 "Size of the smallest latency bucket");
126static int bucket_ratio = 200;
127SYSCTL_INT(_kern_cam_iosched, OID_AUTO, bucket_ratio, CTLFLAG_RD,
129 "Latency Bucket Ratio for geometric progression.");
135#define LAT_BUCKETS 20
137static int lat_buckets = LAT_BUCKETS;
138SYSCTL_INT(_kern_cam_iosched, OID_AUTO, buckets, CTLFLAG_RD,
139 &lat_buckets, LAT_BUCKETS,
140 "Total number of latency buckets published");
155static const char *cam_iosched_limiter_names[] =
156 {
"none",
"queue_depth",
"iops",
"bandwidth" };
162typedef int l_init_t(
struct iop_stats *);
167typedef int l_tick_t(
struct iop_stats *);
174typedef int l_iop_t(
struct iop_stats *,
struct bio *);
183typedef int l_iodone_t(
struct iop_stats *,
struct bio *);
185static l_iop_t cam_iosched_qd_iop;
186static l_iop_t cam_iosched_qd_caniop;
187static l_iodone_t cam_iosched_qd_iodone;
189static l_init_t cam_iosched_iops_init;
190static l_tick_t cam_iosched_iops_tick;
191static l_iop_t cam_iosched_iops_caniop;
192static l_iop_t cam_iosched_iops_iop;
194static l_init_t cam_iosched_bw_init;
195static l_tick_t cam_iosched_bw_tick;
196static l_iop_t cam_iosched_bw_caniop;
197static l_iop_t cam_iosched_bw_iop;
204 l_iodone_t *l_iodone;
216 .l_caniop = cam_iosched_qd_caniop,
217 .l_iop = cam_iosched_qd_iop,
218 .l_iodone= cam_iosched_qd_iodone,
221 .l_init = cam_iosched_iops_init,
222 .l_tick = cam_iosched_iops_tick,
223 .l_caniop = cam_iosched_iops_caniop,
224 .l_iop = cam_iosched_iops_iop,
228 .l_init = cam_iosched_bw_init,
229 .l_tick = cam_iosched_bw_tick,
230 .l_caniop = cam_iosched_bw_caniop,
231 .l_iop = cam_iosched_bw_iop,
240 struct sysctl_ctx_list sysctl_ctx;
241 struct sysctl_oid *sysctl_tree;
272 uint32_t state_flags;
273#define IOP_RATE_LIMITED 1u
275 uint64_t latencies[LAT_BUCKETS];
286static const char *cam_iosched_control_type_names[] =
287 {
"set_max",
"read_latency" };
293 struct sysctl_ctx_list sysctl_ctx;
294 struct sysctl_oid *sysctl_tree;
296 sbintime_t next_steer;
297 sbintime_t steer_interval;
319#ifdef CAM_IOSCHED_DYNAMIC
321 int current_read_bias;
325 struct bio_queue_head write_queue;
326 struct iop_stats read_stats, write_stats, trim_stats;
327 struct sysctl_ctx_list sysctl_ctx;
328 struct sysctl_oid *sysctl_tree;
331 struct callout ticker;
334 sbintime_t last_time;
335 struct control_loop cl;
342#ifdef CAM_IOSCHED_DYNAMIC
347cam_iosched_limiter_init(
struct iop_stats *ios)
349 int lim = ios->limiter;
352 if (lim < none || lim >= limiter_max)
355 if (limsw[lim].l_init)
356 return limsw[lim].l_init(ios);
362cam_iosched_limiter_tick(
struct iop_stats *ios)
364 int lim = ios->limiter;
367 if (lim < none || lim >= limiter_max)
370 if (limsw[lim].l_tick)
371 return limsw[lim].l_tick(ios);
377cam_iosched_limiter_iop(
struct iop_stats *ios,
struct bio *bp)
379 int lim = ios->limiter;
382 if (lim < none || lim >= limiter_max)
385 if (limsw[lim].l_iop)
386 return limsw[lim].l_iop(ios, bp);
392cam_iosched_limiter_caniop(
struct iop_stats *ios,
struct bio *bp)
394 int lim = ios->limiter;
397 if (lim < none || lim >= limiter_max)
400 if (limsw[lim].l_caniop)
401 return limsw[lim].l_caniop(ios, bp);
407cam_iosched_limiter_iodone(
struct iop_stats *ios,
struct bio *bp)
409 int lim = ios->limiter;
412 if (lim < none || lim >= limiter_max)
415 if (limsw[lim].l_iodone)
416 return limsw[lim].l_iodone(ios, bp);
426cam_iosched_qd_iop(
struct iop_stats *ios,
struct bio *bp)
429 if (ios->current <= 0 || ios->pending < ios->current)
436cam_iosched_qd_caniop(
struct iop_stats *ios,
struct bio *bp)
439 if (ios->current <= 0 || ios->pending < ios->current)
446cam_iosched_qd_iodone(
struct iop_stats *ios,
struct bio *bp)
449 if (ios->current <= 0 || ios->pending != ios->current)
456cam_iosched_iops_init(
struct iop_stats *ios)
459 ios->l_value1 = ios->current / ios->softc->quanta;
460 if (ios->l_value1 <= 0)
468cam_iosched_iops_tick(
struct iop_stats *ios)
476 new_ios = (int)((ios->current * (uint64_t)ios->softc->this_frac) >> 16);
477 if (new_ios < 1 && ios->l_value2 < ios->current) {
487 if ((ios->softc->total_ticks % ios->softc->quanta) == 0) {
488 ios->l_value1 = new_ios;
491 ios->l_value1 += new_ios;
498cam_iosched_iops_caniop(
struct iop_stats *ios,
struct bio *bp)
506 if (ios->current > 0 && ios->l_value1 <= 0)
512cam_iosched_iops_iop(
struct iop_stats *ios,
struct bio *bp)
516 rv = cam_iosched_limiter_caniop(ios, bp);
524cam_iosched_bw_init(
struct iop_stats *ios)
528 ios->l_value1 = ios->current * 1000 / ios->softc->quanta;
534cam_iosched_bw_tick(
struct iop_stats *ios)
549 bw = (int)((ios->current * 1000ull * (uint64_t)ios->softc->this_frac) >> 16);
550 if (ios->l_value1 < bw * 4)
557cam_iosched_bw_caniop(
struct iop_stats *ios,
struct bio *bp)
576 if (ios->current > 0 && ios->l_value1 <= 0)
583cam_iosched_bw_iop(
struct iop_stats *ios,
struct bio *bp)
587 rv = cam_iosched_limiter_caniop(ios, bp);
589 ios->l_value1 -= bp->bio_length;
594static void cam_iosched_cl_maybe_steer(
struct control_loop *clp);
597cam_iosched_ticker(
void *arg)
600 sbintime_t now, delta;
603 callout_reset(&isc->ticker, hz / isc->quanta, cam_iosched_ticker, isc);
606 delta = now - isc->last_time;
607 isc->this_frac = (uint32_t)delta >> 16;
608 isc->last_time = now;
610 cam_iosched_cl_maybe_steer(&isc->cl);
612 cam_iosched_limiter_tick(&isc->read_stats);
613 cam_iosched_limiter_tick(&isc->write_stats);
614 cam_iosched_limiter_tick(&isc->trim_stats);
639 pending = isc->read_stats.pending + isc->write_stats.pending ;
640 pending += !!(isc->read_stats.state_flags & IOP_RATE_LIMITED) * isc->read_stats.queued +
641 !!(isc->write_stats.state_flags & IOP_RATE_LIMITED) * isc->write_stats.queued
644 pending /= isc->periph->path->device->ccbq.total_openings;
646 isc->load = (pending + (isc->load << 13) - isc->load) >> 13;
655 clp->next_steer = sbinuptime();
657 clp->steer_interval = SBT_1S * 5;
658 clp->lolat = 5 * SBT_1MS;
659 clp->hilat = 15 * SBT_1MS;
665cam_iosched_cl_maybe_steer(
struct control_loop *clp)
672 now = isc->last_time;
673 if (now < clp->next_steer)
676 clp->next_steer = now + clp->steer_interval;
679 if (isc->write_stats.current != isc->write_stats.max)
680 printf(
"Steering write from %d kBps to %d kBps\n",
681 isc->write_stats.current, isc->write_stats.max);
682 isc->read_stats.current = isc->read_stats.max;
683 isc->write_stats.current = isc->write_stats.max;
684 isc->trim_stats.current = isc->trim_stats.max;
687 old = isc->write_stats.current;
688 lat = isc->read_stats.ema;
715 if (lat < clp->lolat || isc->read_stats.total - clp->last_count < 10)
716 isc->write_stats.current = isc->write_stats.current *
717 (100 + clp->alpha) / 100;
718 else if (lat > clp->hilat)
719 isc->write_stats.current = isc->write_stats.current *
720 (100 - clp->alpha) / 100;
721 clp->last_count = isc->read_stats.total;
727 if (isc->write_stats.current < isc->write_stats.min)
728 isc->write_stats.current = isc->write_stats.min;
729 if (isc->write_stats.current > isc->write_stats.max)
730 isc->write_stats.current = isc->write_stats.max;
732 printf(
"Steering write from %d kBps to %d kBps due to latency of %jdus\n",
733 old, isc->write_stats.current,
734 (uintmax_t)((uint64_t)1000000 * (uint32_t)lat) >> 32);
746#define CAM_IOSCHED_FLAG_TRIM_ACTIVE (1ul << 0)
748#define CAM_IOSCHED_FLAG_CALLOUT_ACTIVE (1ul << 1)
751#define CAM_IOSCHED_FLAG_WORK_FLAGS ((0xffffu) << 16)
753#ifdef CAM_IOSCHED_DYNAMIC
756 sbintime_t sim_latency,
int cmd,
size_t size);
768#ifdef CAM_IOSCHED_DYNAMIC
769 if (do_dynamic_iosched) {
770 struct bio *rbp = bioq_first(&isc->
bio_queue);
771 struct bio *wbp = bioq_first(&isc->write_queue);
772 bool can_write = wbp != NULL &&
773 cam_iosched_limiter_caniop(&isc->write_stats, wbp) == 0;
774 bool can_read = rbp != NULL &&
775 cam_iosched_limiter_caniop(&isc->read_stats, rbp) == 0;
777 printf(
"can write %d: pending_writes %d max_writes %d\n", can_write, isc->write_stats.pending, isc->write_stats.max);
778 printf(
"can read %d: read_stats.pending %d max_reads %d\n", can_read, isc->read_stats.pending, isc->read_stats.max);
779 printf(
"Queued reads %d writes %d\n", isc->read_stats.queued, isc->write_stats.queued);
781 return can_read || can_write;
784 return bioq_first(&isc->
bio_queue) != NULL;
793#ifdef CAM_IOSCHED_DYNAMIC
794 if (do_dynamic_iosched) {
799 if (bp == NULL || cam_iosched_limiter_caniop(&isc->trim_stats, bp) != 0)
823#define cam_iosched_sort_queue(isc) ((isc)->sort_io_queue >= 0 ? \
824 (isc)->sort_io_queue : cam_sort_io_queues)
829#ifdef CAM_IOSCHED_DYNAMIC
841#ifdef CAM_IOSCHED_DYNAMIC
848 ios->max = ios->current = 300000;
858 cam_iosched_limiter_init(ios);
862cam_iosched_limiter_sysctl(SYSCTL_HANDLER_ARGS)
865 struct iop_stats *ios;
872 value = ios->limiter;
873 if (value < none || value >= limiter_max)
876 p = cam_iosched_limiter_names[value];
878 strlcpy(buf, p,
sizeof(buf));
879 error = sysctl_handle_string(oidp, buf,
sizeof(buf), req);
880 if (error != 0 || req->newptr == NULL)
885 for (i = none; i < limiter_max; i++) {
886 if (strcmp(buf, cam_iosched_limiter_names[i]) != 0)
889 error = cam_iosched_limiter_init(ios);
891 ios->limiter = value;
896 callout_reset(&isc->ticker, hz / isc->quanta, cam_iosched_ticker, isc);
908cam_iosched_control_type_sysctl(SYSCTL_HANDLER_ARGS)
911 struct control_loop *clp;
919 if (value < none || value >= cl_max)
922 p = cam_iosched_control_type_names[value];
924 strlcpy(buf, p,
sizeof(buf));
925 error = sysctl_handle_string(oidp, buf,
sizeof(buf), req);
926 if (error != 0 || req->newptr == NULL)
929 for (i = set_max; i < cl_max; i++) {
930 if (strcmp(buf, cam_iosched_control_type_names[i]) != 0)
942cam_iosched_sbintime_sysctl(SYSCTL_HANDLER_ARGS)
949 value = *(sbintime_t *)arg1;
950 us = (uint64_t)value / SBT_1US;
951 snprintf(buf,
sizeof(buf),
"%ju", (intmax_t)us);
952 error = sysctl_handle_string(oidp, buf,
sizeof(buf), req);
953 if (error != 0 || req->newptr == NULL)
955 us = strtoul(buf, NULL, 10);
958 *(sbintime_t *)arg1 = us * SBT_1US;
963cam_iosched_sysctl_latencies(SYSCTL_HANDLER_ARGS)
970 sbuf_new_for_sysctl(&sb, NULL, LAT_BUCKETS * 16, req);
972 for (i = 0; i < LAT_BUCKETS - 1; i++)
973 sbuf_printf(&sb,
"%jd,", (intmax_t)latencies[i]);
974 sbuf_printf(&sb,
"%jd", (intmax_t)latencies[LAT_BUCKETS - 1]);
975 error = sbuf_finish(&sb);
982cam_iosched_quanta_sysctl(SYSCTL_HANDLER_ARGS)
987 quanta = (
unsigned *)arg1;
990 error = sysctl_handle_int(oidp, (
int *)&value, 0, req);
991 if ((error != 0) || (req->newptr == NULL))
994 if (value < 1 || value > hz)
1003cam_iosched_iop_stats_sysctl_init(
struct cam_iosched_softc *isc,
struct iop_stats *ios,
char *name)
1005 struct sysctl_oid_list *n;
1006 struct sysctl_ctx_list *ctx;
1008 ios->sysctl_tree = SYSCTL_ADD_NODE(&isc->sysctl_ctx,
1009 SYSCTL_CHILDREN(isc->sysctl_tree), OID_AUTO, name,
1010 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, name);
1011 n = SYSCTL_CHILDREN(ios->sysctl_tree);
1012 ctx = &ios->sysctl_ctx;
1014 SYSCTL_ADD_UQUAD(ctx, n,
1015 OID_AUTO,
"ema", CTLFLAG_RD,
1017 "Fast Exponentially Weighted Moving Average");
1018 SYSCTL_ADD_UQUAD(ctx, n,
1019 OID_AUTO,
"emvar", CTLFLAG_RD,
1021 "Fast Exponentially Weighted Moving Variance");
1023 SYSCTL_ADD_INT(ctx, n,
1024 OID_AUTO,
"pending", CTLFLAG_RD,
1026 "Instantaneous # of pending transactions");
1027 SYSCTL_ADD_INT(ctx, n,
1028 OID_AUTO,
"count", CTLFLAG_RD,
1030 "# of transactions submitted to hardware");
1031 SYSCTL_ADD_INT(ctx, n,
1032 OID_AUTO,
"queued", CTLFLAG_RD,
1034 "# of transactions in the queue");
1035 SYSCTL_ADD_INT(ctx, n,
1036 OID_AUTO,
"in", CTLFLAG_RD,
1038 "# of transactions queued to driver");
1039 SYSCTL_ADD_INT(ctx, n,
1040 OID_AUTO,
"out", CTLFLAG_RD,
1042 "# of transactions completed (including with error)");
1043 SYSCTL_ADD_INT(ctx, n,
1044 OID_AUTO,
"errs", CTLFLAG_RD,
1046 "# of transactions completed with an error");
1048 SYSCTL_ADD_PROC(ctx, n,
1049 OID_AUTO,
"limiter",
1050 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
1051 ios, 0, cam_iosched_limiter_sysctl,
"A",
1052 "Current limiting type.");
1053 SYSCTL_ADD_INT(ctx, n,
1054 OID_AUTO,
"min", CTLFLAG_RW,
1057 SYSCTL_ADD_INT(ctx, n,
1058 OID_AUTO,
"max", CTLFLAG_RW,
1061 SYSCTL_ADD_INT(ctx, n,
1062 OID_AUTO,
"current", CTLFLAG_RW,
1064 "current resource");
1066 SYSCTL_ADD_PROC(ctx, n,
1067 OID_AUTO,
"latencies",
1068 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
1070 cam_iosched_sysctl_latencies,
"A",
1071 "Array of power of 2 latency from 1ms to 1.024s");
1075cam_iosched_iop_stats_fini(
struct iop_stats *ios)
1077 if (ios->sysctl_tree)
1078 if (sysctl_ctx_free(&ios->sysctl_ctx) != 0)
1079 printf(
"can't remove iosched sysctl stats context\n");
1085 struct sysctl_oid_list *n;
1086 struct sysctl_ctx_list *ctx;
1087 struct control_loop *clp;
1090 clp->sysctl_tree = SYSCTL_ADD_NODE(&isc->sysctl_ctx,
1091 SYSCTL_CHILDREN(isc->sysctl_tree), OID_AUTO,
"control",
1092 CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"Control loop info");
1093 n = SYSCTL_CHILDREN(clp->sysctl_tree);
1094 ctx = &clp->sysctl_ctx;
1096 SYSCTL_ADD_PROC(ctx, n,
1098 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
1099 clp, 0, cam_iosched_control_type_sysctl,
"A",
1100 "Control loop algorithm");
1101 SYSCTL_ADD_PROC(ctx, n,
1102 OID_AUTO,
"steer_interval",
1103 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
1104 &clp->steer_interval, 0, cam_iosched_sbintime_sysctl,
"A",
1105 "How often to steer (in us)");
1106 SYSCTL_ADD_PROC(ctx, n,
1108 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
1109 &clp->lolat, 0, cam_iosched_sbintime_sysctl,
"A",
1110 "Low water mark for Latency (in us)");
1111 SYSCTL_ADD_PROC(ctx, n,
1113 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
1114 &clp->hilat, 0, cam_iosched_sbintime_sysctl,
"A",
1115 "Hi water mark for Latency (in us)");
1116 SYSCTL_ADD_INT(ctx, n,
1117 OID_AUTO,
"alpha", CTLFLAG_RW,
1119 "Alpha for PLL (x100) aka gain");
1123cam_iosched_cl_sysctl_fini(
struct control_loop *clp)
1125 if (clp->sysctl_tree)
1126 if (sysctl_ctx_free(&clp->sysctl_ctx) != 0)
1127 printf(
"can't remove iosched sysctl control loop context\n");
1139 *iscp = malloc(
sizeof(**iscp), M_CAMSCHED, M_NOWAIT | M_ZERO);
1142#ifdef CAM_IOSCHED_DYNAMIC
1144 printf(
"CAM IOSCHEDULER Allocating entry at %p\n", *iscp);
1146 (*iscp)->sort_io_queue = -1;
1147 bioq_init(&(*iscp)->bio_queue);
1148 bioq_init(&(*iscp)->trim_queue);
1149#ifdef CAM_IOSCHED_DYNAMIC
1150 if (do_dynamic_iosched) {
1151 bioq_init(&(*iscp)->write_queue);
1152 (*iscp)->read_bias = 100;
1153 (*iscp)->current_read_bias = 100;
1154 (*iscp)->quanta = min(hz, 200);
1155 cam_iosched_iop_stats_init(*iscp, &(*iscp)->read_stats);
1156 cam_iosched_iop_stats_init(*iscp, &(*iscp)->write_stats);
1157 cam_iosched_iop_stats_init(*iscp, &(*iscp)->trim_stats);
1158 (*iscp)->trim_stats.max = 1;
1159 (*iscp)->last_time = sbinuptime();
1161 (*iscp)->periph = periph;
1162 cam_iosched_cl_init(&(*iscp)->cl, *iscp);
1163 callout_reset(&(*iscp)->ticker, hz / (*iscp)->quanta, cam_iosched_ticker, *iscp);
1180#ifdef CAM_IOSCHED_DYNAMIC
1181 cam_iosched_iop_stats_fini(&isc->read_stats);
1182 cam_iosched_iop_stats_fini(&isc->write_stats);
1183 cam_iosched_iop_stats_fini(&isc->trim_stats);
1184 cam_iosched_cl_sysctl_fini(&isc->cl);
1185 if (isc->sysctl_tree)
1186 if (sysctl_ctx_free(&isc->sysctl_ctx) != 0)
1187 printf(
"can't remove iosched sysctl stats context\n");
1189 callout_drain(&isc->ticker);
1190 isc->
flags &= ~ CAM_IOSCHED_FLAG_CALLOUT_ACTIVE;
1193 free(isc, M_CAMSCHED);
1202 struct sysctl_ctx_list *ctx,
struct sysctl_oid *node)
1204 struct sysctl_oid_list *n;
1206 n = SYSCTL_CHILDREN(node);
1207 SYSCTL_ADD_INT(ctx, n,
1208 OID_AUTO,
"sort_io_queue", CTLFLAG_RW | CTLFLAG_MPSAFE,
1210 "Sort IO queue to try and optimise disk access patterns");
1211 SYSCTL_ADD_INT(ctx, n,
1212 OID_AUTO,
"trim_goal", CTLFLAG_RW,
1214 "Number of trims to try to accumulate before sending to hardware");
1215 SYSCTL_ADD_INT(ctx, n,
1216 OID_AUTO,
"trim_ticks", CTLFLAG_RW,
1218 "IO Schedul qaunta to hold back trims for when accumulating");
1220#ifdef CAM_IOSCHED_DYNAMIC
1221 if (!do_dynamic_iosched)
1224 isc->sysctl_tree = SYSCTL_ADD_NODE(&isc->sysctl_ctx,
1225 SYSCTL_CHILDREN(node), OID_AUTO,
"iosched",
1226 CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"I/O scheduler statistics");
1227 n = SYSCTL_CHILDREN(isc->sysctl_tree);
1228 ctx = &isc->sysctl_ctx;
1230 cam_iosched_iop_stats_sysctl_init(isc, &isc->read_stats,
"read");
1231 cam_iosched_iop_stats_sysctl_init(isc, &isc->write_stats,
"write");
1232 cam_iosched_iop_stats_sysctl_init(isc, &isc->trim_stats,
"trim");
1233 cam_iosched_cl_sysctl_init(isc);
1235 SYSCTL_ADD_INT(ctx, n,
1236 OID_AUTO,
"read_bias", CTLFLAG_RW,
1237 &isc->read_bias, 100,
1238 "How biased towards read should we be independent of limits");
1240 SYSCTL_ADD_PROC(ctx, n,
1241 OID_AUTO,
"quanta", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
1242 &isc->quanta, 0, cam_iosched_quanta_sysctl,
"I",
1243 "How many quanta per second do we slice the I/O up into");
1245 SYSCTL_ADD_INT(ctx, n,
1246 OID_AUTO,
"total_ticks", CTLFLAG_RD,
1247 &isc->total_ticks, 0,
1248 "Total number of ticks we've done");
1250 SYSCTL_ADD_INT(ctx, n,
1251 OID_AUTO,
"load", CTLFLAG_RD,
1253 "scaled load average / 100");
1255 SYSCTL_ADD_U64(ctx, n,
1256 OID_AUTO,
"latency_trigger", CTLFLAG_RW,
1258 "Latency treshold to trigger callbacks");
1266#ifdef CAM_IOSCHED_DYNAMIC
1317#ifdef CAM_IOSCHED_DYNAMIC
1318 if (do_dynamic_iosched)
1319 bioq_flush(&isc->write_queue, stp, err);
1323#ifdef CAM_IOSCHED_DYNAMIC
1338 bp = bioq_first(&isc->write_queue);
1341 printf(
"No writes present in write_queue\n");
1349 if (bioq_first(&isc->
bio_queue) && isc->current_read_bias) {
1352 "Reads present and current_read_bias is %d queued "
1353 "writes %d queued reads %d\n",
1354 isc->current_read_bias, isc->write_stats.queued,
1355 isc->read_stats.queued);
1356 isc->current_read_bias--;
1364 if (cam_iosched_limiter_iop(&isc->write_stats, bp) != 0) {
1366 printf(
"Can't write because limiter says no.\n");
1367 isc->write_stats.state_flags |= IOP_RATE_LIMITED;
1375 isc->current_read_bias = isc->read_bias;
1376 bioq_remove(&isc->write_queue, bp);
1377 if (bp->bio_cmd == BIO_WRITE) {
1378 isc->write_stats.queued--;
1379 isc->write_stats.total++;
1380 isc->write_stats.pending++;
1383 printf(
"HWQ : %p %#x\n", bp, bp->bio_cmd);
1384 isc->write_stats.state_flags &= ~IOP_RATE_LIMITED;
1399#ifdef CAM_IOSCHED_DYNAMIC
1400 isc->trim_stats.queued++;
1401 isc->trim_stats.total--;
1402 isc->trim_stats.pending--;
1424#ifdef CAM_IOSCHED_DYNAMIC
1425 isc->trim_stats.queued--;
1426 isc->trim_stats.total++;
1427 isc->trim_stats.pending++;
1442#ifdef CAM_IOSCHED_DYNAMIC
1448#ifdef CAM_IOSCHED_DYNAMIC
1458 if (do_dynamic_iosched) {
1459 if (bioq_first(&isc->
bio_queue) && isc->current_read_bias) {
1461 printf(
"Reads present and current_read_bias is %d"
1462 " queued trims %d queued reads %d\n",
1463 isc->current_read_bias, isc->trim_stats.queued,
1464 isc->read_stats.queued);
1465 isc->current_read_bias--;
1472 isc->current_read_bias = isc->read_bias;
1485 if (cam_iosched_limiter_iop(&isc->trim_stats, bp) != 0) {
1487 printf(
"Can't trim because limiter says no.\n");
1488 isc->trim_stats.state_flags |= IOP_RATE_LIMITED;
1491 isc->current_read_bias = isc->read_bias;
1492 isc->trim_stats.state_flags &= ~IOP_RATE_LIMITED;
1519#ifdef CAM_IOSCHED_DYNAMIC
1525 if (do_dynamic_iosched) {
1526 if ((bp = cam_iosched_get_write(isc)) != NULL)
1534 if ((bp = bioq_first(&isc->
bio_queue)) == NULL)
1537#ifdef CAM_IOSCHED_DYNAMIC
1542 if (do_dynamic_iosched) {
1543 if (bp->bio_cmd == BIO_READ &&
1544 cam_iosched_limiter_iop(&isc->read_stats, bp) != 0) {
1545 isc->read_stats.state_flags |= IOP_RATE_LIMITED;
1549 isc->read_stats.state_flags &= ~IOP_RATE_LIMITED;
1552#ifdef CAM_IOSCHED_DYNAMIC
1553 if (do_dynamic_iosched) {
1554 if (bp->bio_cmd == BIO_READ) {
1555 isc->read_stats.queued--;
1556 isc->read_stats.total++;
1557 isc->read_stats.pending++;
1559 printf(
"Found bio_cmd = %#x\n", bp->bio_cmd);
1562 printf(
"HWQ : %p %#x\n", bp, bp->bio_cmd);
1589 if (bp->bio_cmd == BIO_SPEEDUP) {
1595 (bp->bio_length == 0 || len < bp->bio_length)) {
1597 len += nbp->bio_length;
1601 if (bp->bio_length > 0) {
1602 if (bp->bio_length > len)
1603 bp->bio_resid = bp->bio_length - len;
1617 if (bp->bio_cmd == BIO_FLUSH && isc->
trim_ticks > 0)
1624 if (bp->bio_cmd == BIO_DELETE) {
1629#ifdef CAM_IOSCHED_DYNAMIC
1630 isc->trim_stats.in++;
1631 isc->trim_stats.queued++;
1634#ifdef CAM_IOSCHED_DYNAMIC
1635 else if (do_dynamic_iosched && (bp->bio_cmd != BIO_READ)) {
1637 bioq_disksort(&isc->write_queue, bp);
1639 bioq_insert_tail(&isc->write_queue, bp);
1641 printf(
"Qw : %p %#x\n", bp, bp->bio_cmd);
1642 if (bp->bio_cmd == BIO_WRITE) {
1643 isc->write_stats.in++;
1644 isc->write_stats.queued++;
1653#ifdef CAM_IOSCHED_DYNAMIC
1655 printf(
"Qr : %p %#x\n", bp, bp->bio_cmd);
1656 if (bp->bio_cmd == BIO_READ) {
1657 isc->read_stats.in++;
1658 isc->read_stats.queued++;
1659 }
else if (bp->bio_cmd == BIO_WRITE) {
1660 isc->write_stats.in++;
1661 isc->write_stats.queued++;
1685 isc->
flags &= ~CAM_IOSCHED_FLAG_TRIM_ACTIVE;
1694 union ccb *done_ccb)
1697#ifdef CAM_IOSCHED_DYNAMIC
1698 if (!do_dynamic_iosched)
1702 printf(
"done: %p %#x\n", bp, bp->bio_cmd);
1703 if (bp->bio_cmd == BIO_WRITE) {
1704 retval = cam_iosched_limiter_iodone(&isc->write_stats, bp);
1705 if ((bp->bio_flags & BIO_ERROR) != 0)
1706 isc->write_stats.errs++;
1707 isc->write_stats.out++;
1708 isc->write_stats.pending--;
1709 }
else if (bp->bio_cmd == BIO_READ) {
1710 retval = cam_iosched_limiter_iodone(&isc->read_stats, bp);
1711 if ((bp->bio_flags & BIO_ERROR) != 0)
1712 isc->read_stats.errs++;
1713 isc->read_stats.out++;
1714 isc->read_stats.pending--;
1715 }
else if (bp->bio_cmd == BIO_DELETE) {
1716 if ((bp->bio_flags & BIO_ERROR) != 0)
1717 isc->trim_stats.errs++;
1718 isc->trim_stats.out++;
1719 isc->trim_stats.pending--;
1720 }
else if (bp->bio_cmd != BIO_FLUSH) {
1722 printf(
"Completing command with bio_cmd == %#x\n", bp->bio_cmd);
1725 if ((bp->bio_flags & BIO_ERROR) == 0 && done_ccb != NULL &&
1727 sbintime_t sim_latency;
1731 cam_iosched_io_metric_update(isc, sim_latency,
1732 bp->bio_cmd, bp->bio_bcount);
1737 if (isc->latfcn && isc->max_lat != 0 && sim_latency > isc->max_lat)
1738 isc->latfcn(isc->latarg, sim_latency, bp);
1770 return isc->
flags & flags;
1776 isc->
flags |= flags;
1782 isc->
flags &= ~flags;
1785#ifdef CAM_IOSCHED_DYNAMIC
1801isqrt64(uint64_t val)
1804 uint64_t bit = 1ULL << (
sizeof(uint64_t) * NBBY - 2);
1820 if (val >= res + bit) {
1822 res = (res >> 1) + bit;
1831static sbintime_t latencies[LAT_BUCKETS - 1] = {
1854cam_iosched_update(
struct iop_stats *iop, sbintime_t sim_latency)
1856 sbintime_t y, deltasq, delta;
1863 for (i = 0; i < LAT_BUCKETS - 1; i++) {
1864 if (sim_latency < latencies[i]) {
1865 iop->latencies[i]++;
1869 if (i == LAT_BUCKETS - 1)
1870 iop->latencies[i]++;
1908 delta = (y - iop->ema);
1909 iop->ema = ((iop->ema << alpha_bits) + delta) >> alpha_bits;
1926 deltasq = delta * delta;
1927 iop->emvar = ((iop->emvar << (2 * alpha_bits)) +
1928 ((deltasq - iop->emvar) << alpha_bits) +
1930 >> (2 * alpha_bits);
1931 iop->sd = (sbintime_t)isqrt64((uint64_t)iop->emvar) << 12;
1936 sbintime_t sim_latency,
int cmd,
size_t size)
1941 cam_iosched_update(&isc->read_stats, sim_latency);
1944 cam_iosched_update(&isc->write_stats, sim_latency);
1947 cam_iosched_update(&isc->trim_stats, sim_latency);
1955static int biolen(
struct bio_queue_head *bq)
1960 TAILQ_FOREACH(bp, &bq->queue, bio_queue) {
1969DB_SHOW_COMMAND(iosched, cam_iosched_db_show)
1974 db_printf(
"Need addr\n");
1978 db_printf(
"pending_reads: %d\n", isc->read_stats.pending);
1979 db_printf(
"min_reads: %d\n", isc->read_stats.min);
1980 db_printf(
"max_reads: %d\n", isc->read_stats.max);
1981 db_printf(
"reads: %d\n", isc->read_stats.total);
1982 db_printf(
"in_reads: %d\n", isc->read_stats.in);
1983 db_printf(
"out_reads: %d\n", isc->read_stats.out);
1984 db_printf(
"queued_reads: %d\n", isc->read_stats.queued);
1985 db_printf(
"Read Q len %d\n", biolen(&isc->
bio_queue));
1986 db_printf(
"pending_writes: %d\n", isc->write_stats.pending);
1987 db_printf(
"min_writes: %d\n", isc->write_stats.min);
1988 db_printf(
"max_writes: %d\n", isc->write_stats.max);
1989 db_printf(
"writes: %d\n", isc->write_stats.total);
1990 db_printf(
"in_writes: %d\n", isc->write_stats.in);
1991 db_printf(
"out_writes: %d\n", isc->write_stats.out);
1992 db_printf(
"queued_writes: %d\n", isc->write_stats.queued);
1993 db_printf(
"Write Q len %d\n", biolen(&isc->write_queue));
1994 db_printf(
"pending_trims: %d\n", isc->trim_stats.pending);
1995 db_printf(
"min_trims: %d\n", isc->trim_stats.min);
1996 db_printf(
"max_trims: %d\n", isc->trim_stats.max);
1997 db_printf(
"trims: %d\n", isc->trim_stats.total);
1998 db_printf(
"in_trims: %d\n", isc->trim_stats.in);
1999 db_printf(
"out_trims: %d\n", isc->trim_stats.out);
2000 db_printf(
"queued_trims: %d\n", isc->trim_stats.queued);
2001 db_printf(
"Trim Q len %d\n", biolen(&isc->
trim_queue));
2002 db_printf(
"read_bias: %d\n", isc->read_bias);
2003 db_printf(
"current_read_bias: %d\n", isc->current_read_bias);
2004 db_printf(
"Trim active? %s\n",
SYSCTL_INT(_kern_cam_ada, OID_AUTO, retry_count, CTLFLAG_RWTUN, &ada_retry_count, 0, "Normal I/O retry count")
#define CAM_PRIORITY_NORMAL
static bool cam_iosched_has_io(struct cam_iosched_softc *isc)
struct bio * cam_iosched_next_trim(struct cam_iosched_softc *isc)
static bool cam_iosched_has_more_trim(struct cam_iosched_softc *isc)
void cam_iosched_clr_work_flags(struct cam_iosched_softc *isc, uint32_t flags)
void cam_iosched_fini(struct cam_iosched_softc *isc)
void cam_iosched_set_work_flags(struct cam_iosched_softc *isc, uint32_t flags)
static SYSCTL_NODE(_kern_cam, OID_AUTO, iosched, CTLFLAG_RD|CTLFLAG_MPSAFE, 0, "CAM I/O Scheduler parameters")
void cam_iosched_flush(struct cam_iosched_softc *isc, struct devstat *stp, int err)
void cam_iosched_set_trim_goal(struct cam_iosched_softc *isc, int goal)
struct bio * cam_iosched_get_trim(struct cam_iosched_softc *isc)
void cam_iosched_trim_done(struct cam_iosched_softc *isc)
#define CAM_IOSCHED_FLAG_CALLOUT_ACTIVE
int cam_iosched_init(struct cam_iosched_softc **iscp, struct cam_periph *periph)
void cam_iosched_put_back_trim(struct cam_iosched_softc *isc, struct bio *bp)
int cam_iosched_bio_complete(struct cam_iosched_softc *isc, struct bio *bp, union ccb *done_ccb)
static bool cam_iosched_has_work(struct cam_iosched_softc *isc)
#define cam_iosched_sort_queue(isc)
#define CAM_IOSCHED_FLAG_WORK_FLAGS
void cam_iosched_sysctl_init(struct cam_iosched_softc *isc, struct sysctl_ctx_list *ctx, struct sysctl_oid *node)
void cam_iosched_set_latfcn(struct cam_iosched_softc *isc, cam_iosched_latfcn_t fnp, void *argp)
void cam_iosched_set_trim_ticks(struct cam_iosched_softc *isc, int trim_ticks)
void cam_iosched_schedule(struct cam_iosched_softc *isc, struct cam_periph *periph)
void cam_iosched_set_sort_queue(struct cam_iosched_softc *isc, int val)
void cam_iosched_submit_trim(struct cam_iosched_softc *isc)
struct bio * cam_iosched_next_bio(struct cam_iosched_softc *isc)
#define CAM_IOSCHED_FLAG_TRIM_ACTIVE
int cam_iosched_has_work_flags(struct cam_iosched_softc *isc, uint32_t flags)
static MALLOC_DEFINE(M_CAMSCHED, "CAM I/O Scheduler", "CAM I/O Scheduler buffers")
static bool cam_iosched_has_flagged_work(struct cam_iosched_softc *isc)
void cam_iosched_queue_work(struct cam_iosched_softc *isc, struct bio *bp)
static sbintime_t cam_iosched_sbintime_t(uintptr_t delta)
void(* cam_iosched_latfcn_t)(void *, sbintime_t, struct bio *)
#define cam_periph_lock(periph)
#define cam_periph_unlock(periph)
static __inline struct mtx * cam_periph_mtx(struct cam_periph *periph)
void xpt_schedule(struct cam_periph *periph, u_int32_t new_priority)
struct bio_queue_head bio_queue
struct bio_queue_head trim_queue