1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
|
/* $OpenBSD: buf.h,v 1.112 2019/11/29 01:04:08 beck Exp $ */
/* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)buf.h 8.7 (Berkeley) 1/21/94
*/
#ifndef _SYS_BUF_H_
#define _SYS_BUF_H_
#include <sys/queue.h>
#include <sys/tree.h>
#include <sys/mutex.h>
#include <uvm/uvm_extern.h>
#define NOLIST ((struct buf *)0x87654321)
struct buf;
struct vnode;
LIST_HEAD(bufhead, buf);
/*
* To avoid including <ufs/ffs/softdep.h>
*/
LIST_HEAD(workhead, worklist);
/*
* Buffer queues
*/
#define BUFQ_NSCAN_N 128
#define BUFQ_FIFO 0
#define BUFQ_NSCAN 1
#define BUFQ_DEFAULT BUFQ_NSCAN
#define BUFQ_HOWMANY 2
/*
* Write limits for bufq - defines high and low water marks for how
* many kva slots are allowed to be consumed to parallelize writes from
* the buffer cache from any individual bufq.
*/
#define BUFQ_HI 128
#define BUFQ_LOW 64
struct bufq_impl;
struct bufq {
SLIST_ENTRY(bufq) bufq_entries;
struct mutex bufq_mtx;
void *bufq_data;
u_int bufq_outstanding;
u_int bufq_hi;
u_int bufq_low;
int bufq_waiting;
int bufq_stop;
int bufq_type;
const struct bufq_impl *bufq_impl;
};
int bufq_init(struct bufq *, int);
int bufq_switch(struct bufq *, int);
void bufq_destroy(struct bufq *);
void bufq_queue(struct bufq *, struct buf *);
struct buf *bufq_dequeue(struct bufq *);
void bufq_requeue(struct bufq *, struct buf *);
int bufq_peek(struct bufq *);
void bufq_drain(struct bufq *);
void bufq_wait(struct bufq *);
void bufq_done(struct bufq *, struct buf *);
void bufq_quiesce(void);
void bufq_restart(void);
/* fifo */
SIMPLEQ_HEAD(bufq_fifo_head, buf);
struct bufq_fifo {
SIMPLEQ_ENTRY(buf) bqf_entries;
};
/* nscan */
SIMPLEQ_HEAD(bufq_nscan_head, buf);
struct bufq_nscan {
SIMPLEQ_ENTRY(buf) bqf_entries;
};
/* bufq link in struct buf */
union bufq_data {
struct bufq_fifo bufq_data_fifo;
struct bufq_nscan bufq_data_nscan;
};
/*
* These are currently used only by the soft dependency code, hence
* are stored once in a global variable. If other subsystems wanted
* to use these hooks, a pointer to a set of bio_ops could be added
* to each buffer.
*/
extern struct bio_ops {
void (*io_start)(struct buf *);
void (*io_complete)(struct buf *);
void (*io_deallocate)(struct buf *);
void (*io_movedeps)(struct buf *, struct buf *);
int (*io_countdeps)(struct buf *, int, int);
} bioops;
/* The buffer header describes an I/O operation in the kernel. */
struct buf {
RBT_ENTRY(buf) b_rbbufs; /* vnode "hash" tree */
LIST_ENTRY(buf) b_list; /* All allocated buffers. */
LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */
TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */
int cache; /* which cache are we in */
struct proc *b_proc; /* Associated proc; NULL if kernel. */
volatile long b_flags; /* B_* flags. */
long b_bufsize; /* Allocated buffer size. */
long b_bcount; /* Valid bytes in buffer. */
size_t b_resid; /* Remaining I/O. */
int b_error; /* Errno value. */
dev_t b_dev; /* Device associated with buffer. */
caddr_t b_data; /* associated data */
void *b_saveaddr; /* Original b_data for physio. */
TAILQ_ENTRY(buf) b_valist; /* LRU of va to reuse. */
union bufq_data b_bufq;
struct bufq *b_bq; /* What bufq this buf is on */
struct uvm_object *b_pobj;
struct uvm_object b_uobj; /* Object containing the pages */
off_t b_poffs; /* Offset within object */
daddr_t b_lblkno; /* Logical block number. */
daddr_t b_blkno; /* Underlying physical block number. */
/* Function to call upon completion.
* Will be called at splbio(). */
void (*b_iodone)(struct buf *);
struct vnode *b_vp; /* Device vnode. */
int b_dirtyoff; /* Offset in buffer of dirty region. */
int b_dirtyend; /* Offset of end of dirty region. */
int b_validoff; /* Offset in buffer of valid region. */
int b_validend; /* Offset of end of valid region. */
struct workhead b_dep; /* List of filesystem dependencies. */
};
TAILQ_HEAD(bufqueue, buf);
struct bufcache {
int64_t hotbufpages;
int64_t warmbufpages;
int64_t cachepages;
struct bufqueue hotqueue;
struct bufqueue coldqueue;
struct bufqueue warmqueue;
};
/* Device driver compatibility definitions. */
#define b_active b_bcount /* Driver queue head: drive active. */
/*
* These flags are kept in b_flags.
*/
#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
#define B_AGE 0x00000001 /* Move to age queue when I/O done. */
#define B_NEEDCOMMIT 0x00000002 /* Needs committing to stable storage */
#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
#define B_BAD 0x00000008 /* Bad block revectoring in progress. */
#define B_BUSY 0x00000010 /* I/O in progress. */
#define B_CACHE 0x00000020 /* Bread found us in the cache. */
#define B_CALL 0x00000040 /* Call b_iodone from biodone. */
#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
#define B_DONE 0x00000100 /* I/O completed. */
#define B_EINTR 0x00000200 /* I/O was interrupted */
#define B_ERROR 0x00000400 /* I/O error occurred. */
#define B_INVAL 0x00000800 /* Does not contain valid info. */
#define B_NOCACHE 0x00001000 /* Do not cache block after use. */
#define B_PHYS 0x00002000 /* I/O to user memory. */
#define B_RAW 0x00004000 /* Set by physio for raw transfers. */
#define B_READ 0x00008000 /* Read buffer. */
#define B_WANTED 0x00010000 /* Process wants this buffer. */
#define B_WRITEINPROG 0x00020000 /* Write in progress. */
#define B_XXX 0x00040000 /* Debugging flag. */
#define B_DEFERRED 0x00080000 /* Skipped over for cleaning */
#define B_SCANNED 0x00100000 /* Block already pushed during sync */
#define B_PDAEMON 0x00200000 /* I/O started by pagedaemon */
#define B_RELEASED 0x00400000 /* free this buffer after its kvm */
#define B_WARM 0x00800000 /* buffer is or has been on the warm queue */
#define B_COLD 0x01000000 /* buffer is on the cold queue */
#define B_BC 0x02000000 /* buffer is managed by the cache */
#define B_DMA 0x04000000 /* buffer is DMA reachable */
#define B_BITS "\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
"\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \
"\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \
"\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \
"\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA"
/*
* Zero out the buffer's data area.
*/
#define clrbuf(bp) { \
bzero((bp)->b_data, (bp)->b_bcount); \
(bp)->b_resid = 0; \
}
/* Flags to low-level allocation routines. */
#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
#define B_SYNC 0x02 /* Do all allocations synchronously. */
struct cluster_info {
daddr_t ci_lastr; /* last read (read-ahead) */
daddr_t ci_lastw; /* last write (write cluster) */
daddr_t ci_cstart; /* start block of cluster */
daddr_t ci_lasta; /* last allocation */
int ci_clen; /* length of current cluster */
int ci_ralen; /* Read-ahead length */
daddr_t ci_maxra; /* last readahead block */
};
#ifdef _KERNEL
__BEGIN_DECLS
/* Kva slots (of size MAXPHYS) reserved for syncer and cleaner. */
#define RESERVE_SLOTS 4
/* Buffer cache pages reserved for syncer and cleaner. */
#define RESERVE_PAGES (RESERVE_SLOTS * MAXPHYS / PAGE_SIZE)
/* Minimum size of the buffer cache, in pages. */
#define BCACHE_MIN (RESERVE_PAGES * 2)
#define UNCLEAN_PAGES (bcstats.numbufpages - bcstats.numcleanpages)
extern struct proc *cleanerproc;
extern long bufpages; /* Max number of pages for buffers' data */
extern struct pool bufpool;
extern struct bufhead bufhead;
void bawrite(struct buf *);
void bdwrite(struct buf *);
void biodone(struct buf *);
int biowait(struct buf *);
int bread(struct vnode *, daddr_t, int, struct buf **);
int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
struct buf **);
void brelse(struct buf *);
#define bremfree bufcache_take
void bufinit(void);
void buf_dirty(struct buf *);
void buf_undirty(struct buf *);
void buf_adjcnt(struct buf *, long);
int bwrite(struct buf *);
struct buf *getblk(struct vnode *, daddr_t, int, int, uint64_t);
struct buf *geteblk(size_t);
struct buf *incore(struct vnode *, daddr_t);
/*
* bufcache functions
*/
void bufcache_take(struct buf *);
void bufcache_release(struct buf *);
int buf_flip_high(struct buf *);
void buf_flip_dma(struct buf *);
struct buf *bufcache_getcleanbuf(int, int);
struct buf *bufcache_getanycleanbuf(void);
struct buf *bufcache_getdirtybuf(void);
/*
* buf_kvm_init initializes the kvm handling for buffers.
* buf_acquire sets the B_BUSY flag and ensures that the buffer is
* mapped in the kvm.
* buf_release clears the B_BUSY flag and allows the buffer to become
* unmapped.
* buf_unmap is for internal use only. Unmaps the buffer from kvm.
*/
void buf_mem_init(vsize_t);
void buf_acquire(struct buf *);
void buf_acquire_unmapped(struct buf *);
void buf_acquire_nomap(struct buf *);
void buf_map(struct buf *);
void buf_release(struct buf *);
int buf_dealloc_mem(struct buf *);
void buf_fix_mapping(struct buf *, vsize_t);
void buf_alloc_pages(struct buf *, vsize_t);
void buf_free_pages(struct buf *);
void minphys(struct buf *bp);
int physio(void (*strategy)(struct buf *), dev_t dev, int flags,
void (*minphys)(struct buf *), struct uio *uio);
void brelvp(struct buf *);
void reassignbuf(struct buf *);
void bgetvp(struct vnode *, struct buf *);
void buf_replacevnode(struct buf *, struct vnode *);
void buf_daemon(void *);
void buf_replacevnode(struct buf *, struct vnode *);
int bread_cluster(struct vnode *, daddr_t, int, struct buf **);
#ifdef DEBUG
void buf_print(struct buf *);
#endif
static __inline void
buf_start(struct buf *bp)
{
if (bioops.io_start)
(*bioops.io_start)(bp);
}
static __inline void
buf_complete(struct buf *bp)
{
if (bioops.io_complete)
(*bioops.io_complete)(bp);
}
static __inline void
buf_deallocate(struct buf *bp)
{
if (bioops.io_deallocate)
(*bioops.io_deallocate)(bp);
}
static __inline void
buf_movedeps(struct buf *bp, struct buf *bp2)
{
if (bioops.io_movedeps)
(*bioops.io_movedeps)(bp, bp2);
}
static __inline int
buf_countdeps(struct buf *bp, int i, int islocked)
{
if (bioops.io_countdeps)
return ((*bioops.io_countdeps)(bp, i, islocked));
else
return (0);
}
void cluster_write(struct buf *, struct cluster_info *, u_quad_t);
__END_DECLS
#endif /* _KERNEL */
#endif /* !_SYS_BUF_H_ */
|