Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | #ifndef _LINUX_PSI_TYPES_H #define _LINUX_PSI_TYPES_H #include <linux/kthread.h> #include <linux/seqlock.h> #include <linux/types.h> #include <linux/kref.h> #include <linux/wait.h> #ifdef CONFIG_PSI /* Tracked task states */ enum psi_task_count { NR_IOWAIT, NR_MEMSTALL, NR_RUNNING, /* * This can't have values other than 0 or 1 and could be * implemented as a bit flag. But for now we still have room * in the first cacheline of psi_group_cpu, and this way we * don't have to special case any state tracking for it. */ NR_ONCPU, /* * For IO and CPU stalls the presence of running/oncpu tasks * in the domain means a partial rather than a full stall. * For memory it's not so simple because of page reclaimers: * they are running/oncpu while representing a stall. To tell * whether a domain has productivity left or not, we need to * distinguish between regular running (i.e. productive) * threads and memstall ones. */ NR_MEMSTALL_RUNNING, NR_PSI_TASK_COUNTS = 5, }; /* Task state bitmasks */ #define TSK_IOWAIT (1 << NR_IOWAIT) #define TSK_MEMSTALL (1 << NR_MEMSTALL) #define TSK_RUNNING (1 << NR_RUNNING) #define TSK_ONCPU (1 << NR_ONCPU) #define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING) /* Resources that workloads could be stalled on */ enum psi_res { PSI_IO, PSI_MEM, PSI_CPU, NR_PSI_RESOURCES = 3, }; /* * Pressure states for each resource: * * SOME: Stalled tasks & working tasks * FULL: Stalled tasks & no working tasks */ enum psi_states { PSI_IO_SOME, PSI_IO_FULL, PSI_MEM_SOME, PSI_MEM_FULL, PSI_CPU_SOME, PSI_CPU_FULL, /* Only per-CPU, to weigh the CPU in the global average: */ PSI_NONIDLE, NR_PSI_STATES = 7, }; enum psi_aggregators { PSI_AVGS = 0, PSI_POLL, NR_PSI_AGGREGATORS, }; struct psi_group_cpu { /* 1st cacheline updated by the scheduler */ /* Aggregator needs to know of concurrent changes */ seqcount_t seq ____cacheline_aligned_in_smp; /* States of the tasks belonging to this group */ unsigned int tasks[NR_PSI_TASK_COUNTS]; /* Aggregate pressure state derived from the tasks */ u32 state_mask; /* Period time sampling buckets for each state of interest (ns) */ u32 times[NR_PSI_STATES]; /* Time of last task change in this group (rq_clock) */ u64 state_start; /* 2nd cacheline updated by the aggregator */ /* Delta detection against the sampling buckets */ u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES] ____cacheline_aligned_in_smp; }; /* PSI growth tracking window */ struct psi_window { /* Window size in ns */ u64 size; /* Start time of the current window in ns */ u64 start_time; /* Value at the start of the window */ u64 start_value; /* Value growth in the previous window */ u64 prev_growth; }; struct psi_trigger { /* PSI state being monitored by the trigger */ enum psi_states state; /* User-spacified threshold in ns */ u64 threshold; /* List node inside triggers list */ struct list_head node; /* Backpointer needed during trigger destruction */ struct psi_group *group; /* Wait queue for polling */ wait_queue_head_t event_wait; /* Pending event flag */ int event; /* Tracking window */ struct psi_window win; /* * Time last event was generated. Used for rate-limiting * events to one per window */ u64 last_event_time; }; struct psi_group { /* Protects data used by the aggregator */ struct mutex avgs_lock; /* Per-cpu task state & time tracking */ struct psi_group_cpu __percpu *pcpu; /* Running pressure averages */ u64 avg_total[NR_PSI_STATES - 1]; u64 avg_last_update; u64 avg_next_update; /* Aggregator work control */ struct delayed_work avgs_work; /* Total stall times and sampled pressure averages */ u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1]; unsigned long avg[NR_PSI_STATES - 1][3]; /* Monitor work control */ struct task_struct __rcu *poll_task; struct timer_list poll_timer; wait_queue_head_t poll_wait; atomic_t poll_wakeup; /* Protects data used by the monitor */ struct mutex trigger_lock; /* Configured polling triggers */ struct list_head triggers; u32 nr_triggers[NR_PSI_STATES - 1]; u32 poll_states; u64 poll_min_period; /* Total stall times at the start of monitor activation */ u64 polling_total[NR_PSI_STATES - 1]; u64 polling_next_update; u64 polling_until; }; #else /* CONFIG_PSI */ struct psi_group { }; #endif /* CONFIG_PSI */ #endif /* _LINUX_PSI_TYPES_H */ |