分析块设备文件接口和RAMDISK驱动程序的结构二
static int rd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsignedlong arg)
%A {
%A unsigned int minor;
%A
%A if (!inode || !inode->i_rdev)
%A return -EINVAL;
%A
%A minor = MINOR(inode->i_rdev);
%A
%A switch (cmd) {
%A case BLKFLSBUF:
%A if (!capable(CAP_SYS_ADMIN))
%A return -EACCES;
%A /* special: we want to release the ramdisk memory,
%A it‘s not like with the other blockdevices where
%A this ioctl only flushes away the buffer cache. */
%A if ((atomic_read(&inode->i_bdev->bd_openers) > 2))
%A return -EBUSY;
%A destroy_buffers(inode->i_rdev);
%A rd_blocksizes[minor] = 0;
%A break;
%A
%A case BLKGETSIZE: /* Return device size */
%A if (!arg) return -EINVAL;
%A return put_user(rd_kbsize[minor] << 1, (long *) arg);
%A
%A case BLKROSET:
%A case BLKROGET:
%A case BLKSSZGET:
%A return blk_ioctl(inode->i_rdev, cmd, arg);
%A
%A default:
%A return -EINVAL;
%A };
%A
%A return 0;
%A }
%A static int rd_release(struct inode * inode, struct file * filp)
%A {
%A MOD_DEC_USE_COUNT;
%A return 0;
%A }
%A
%A 初始化内存盘的文件接口
%A ======================
%A static struct file_operations initrd_fops = {
%A read: initrd_read,
%A release: initrd_release,
%A };
%A static ssize_t initrd_read(struct file *file, char *buf,
%A size_t count, loff_t *ppos)
%A {
%A int left;
%A
%A left = initrd_end - initrd_start - *ppos;
%A if (count > left) count = left;
%A if (count == 0) return 0;
%A copy_to_user(buf, (char *)initrd_start + *ppos, count);
%A *ppos += count;
%A return count;
%A }
%A static int initrd_release(struct inode *inode,struct file *file)
%A {
%A extern void free_initrd_mem(unsigned long, unsigned long);
%A
%A lock_kernel();
%A if (!--initrd_users) {
%A blkdev_put(inode->i_bdev, BDEV_FILE);
%A iput(inode);
%A free_initrd_mem(initrd_start, initrd_end);
%A initrd_start = 0;
%A }
%A unlock_kernel();
%A return 0;
%A }
%A
%A 块设备文件IO函数
%A ================
%A ; fs/block_dev.c
%A
%A struct block_device {
%A struct list_head bd_hash;
%A atomic_t bd_count;
%A /* struct address_space bd_data; */
%A dev_t bd_dev; /* not a kdev_t - it‘s a search key */
%A atomic_t bd_openers;
%A const struct block_device_operations *bd_op;
%A struct semaphore bd_sem; /* open/close mutex */
%A };
%A
%A struct file_operations def_blk_fops = {
%A open: blkdev_open,
%A release: blkdev_close,
%A llseek: block_llseek,
%A read: block_read,
%A write: block_write,
%A fsync: block_fsync,
%A ioctl: blkdev_ioctl,
%A };
%A
%A int blkdev_open(struct inode * inode, struct file * filp)
%A {
%A int ret = -ENXIO;
%A struct block_device *bdev = inode->i_bdev;
%A down(&bdev->bd_sem);
%A lock_kernel();
%A if (!bdev->bd_op)
%A bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev));
%A ; 在设备盘打开的时候安装其块设备函数表
%A if (bdev->bd_op) {
%A ret = 0;
%A if (bdev->bd_op->open)
%A ret = bdev->bd_op->open(inode,filp); 向驱动程序发送"open"消息
%A if (!ret)
%A atomic_inc(&bdev->bd_openers);
%A else if (!atomic_read(&bdev->bd_openers))
%A bdev->bd_op = NULL;
%A }
%A unlock_kernel();
%A up(&bdev->bd_sem);
%A return ret;
%A }
%A static int blkdev_close(struct inode * inode, struct file * filp)
%A {
%A return blkdev_put(inode->i_bdev, BDEV_FILE);
%A }
%A static loff_t block_llseek(struct file *file, loff_t offset, int origin)
%A {
%A long long retval;
%A kdev_t dev;
%A
%A switch (origin) {
%A case 2: 尾部定位
%A dev = file->f_dentry->d_inode->i_rdev;
%A if (blk_size[MAJOR(dev)])
%A offset += (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;
%A /* else? return -EINVAL? */
%A break;
%A case 1: 相对定位
%A offset += file->f_pos;
%A }
%A retval = -EINVAL;
%A if (offset >= 0) {
%A if (offset != file->f_pos) {
%A file->f_pos = offset;
%A file->f_reada = 0;
%A file->f_version = ++event;
%A }
%A retval = offset;
%A }
%A return retval;
%A }
%A ssize_t block_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
%A {
%A struct inode * inode = filp->f_dentry->d_inode;
%A size_t block;
%A loff_t offset;
%A ssize_t blocksize;
%A ssize_t blocksize_bits, i;
%A size_t blocks, rblocks, left;
%A int bhrequest, uptodate;
%A struct buffer_head ** bhb, ** bhe;
%A struct buffer_head * buflist[NBUF];
%A struct buffer_head * bhreq[NBUF];
%A unsigned int chars;
%A loff_t size;
%A kdev_t dev;
%A ssize_t read;
%A
%A dev = inode->i_rdev;
%A blocksize = BLOCK_SIZE; 块设备的标准尺寸是1024字节
%A if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
%A blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
%A i = blocksize;
%A blocksize_bits = 0;
%A while (i != 1) {
%A blocksize_bits++;
%A i >>= 1;
%A }
%A
%A offset = *ppos;
%A if (blk_size[MAJOR(dev)])
%A size = (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;取设备盘容量
%A else
%A size = (loff_t) INT_MAX << BLOCK_SIZE_BITS;loff_t为64位整数,取设备盘的极限容量
%A
%A if (offset > size)
%A left = 0; left为剩余所要读的字节,offset为块设备文件指针
%A /* size - offset might not fit into left, so check explicitly. */
%A else if (size - offset > INT_MAX)
%A left = INT_MAX;
%A else
%A left = size - offset;
%A if (left > count)
%A left = count;
%A if (left <= 0)
%A return 0;
%A read = 0;
%A block = offset >> blocksize_bits; 求所要读的起始块号
%A offset &= blocksize-1; 求所要读的在块中的起点
%A size >>= blocksize_bits;
%A rblocks = blocks = (left + offset + blocksize - 1) >> blocksize_bits;求所要读的块
%A bhb = bhe = buflist; buflist作为环形队列,bhb是对设备的读指针,bhe是对用户的写指针
%A if (filp->f_reada) {
%A if (blocks < read_ahead[MAJOR(dev)] / (blocksize >> 9))
%A blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9);
%A if (rblocks > blocks)
%A blocks = rblocks;
%A
%A }
%A if (block + blocks > size) { 如果预读越过设备边界
%A blocks = size - block;
%A if (blocks == 0)
%A return 0;
%A }
%A
%A /* We do this in a two stage process. We first try to request
%A as many blocks as we can, then we wait for the first one to
%A complete, and then we try to wrap up as many as are actually
%A done. This routine is rather generic, in that it can be used
%A in a filesystem by substituting the appropriate function in
%A for getblk.
%A
%A This routine is optimized to make maximum use of the various
%A buffers and caches. */
%A
%A do {
%A bhrequest = 0;
%A uptodate = 1;
%A while (blocks) {
%A --blocks;
%A *bhb = getblk(dev, block++, blocksize); 取缓冲页
%A if (*bhb && !buffer_uptodate(*bhb)) { 该块不在缓冲中
%A uptodate = 0;
%A bhreq[bhrequest++] = *bhb; 请求块排队
%A }
%A
%A if (++bhb == &buflist[NBUF])
%A bhb = buflist; bhb是buflist上的循环指针,指向下一要读的块
%A
%A /* If the block we have on hand is uptodate, go ahead
%A and complete processing. */
%A if (uptodate) 如果当前块在缓冲中
%A break;
%A if (bhb == bhe) 如果请求块队列已满
%A break;
%A }
%A
%A /* Now request them all */
%A if (bhrequest) {
%A ll_rw_block(READ, bhrequest, bhreq); 读请求块
%A }
%A
%A do { /* Finish off all I/O that has actually completed */
%A if (*bhe) {
%A wait_on_buffer(*bhe); 等待块解锁,即读完成,对缓冲块无效
%A if (!buffer_uptodate(*bhe)) { /* read error? */
%A brelse(*bhe);
%A if (++bhe == &buflist[NBUF])
%A bhe = buflist;
%A left = 0;
%A break;
%A }
%A }
%A if (left < blocksize - offset)
%A chars = left; 如果读终止于块内,则向用户拷贝剩余字节
%A else
%A chars = blocksize - offset; 拷贝到块尾
%A *ppos += chars;
%A left -= chars;
%A read += chars; 已读的字节数
%A if (*bhe) {
%A copy_to_user(buf,offset+(*bhe)->b_data,chars);
%A brelse(*bhe);
%A buf += chars; 更新用户指针
%A } else {
%A while (chars-- > 0)
%A put_user(0,buf++); 向用户传送0字节
%A }
%A offset = 0; 开始从块边界拷贝
%A if (++bhe == &buflist[NBUF])
%A bhe = buflist; bhe是buflist上的循环指针,指向下一要向用户写的块
%A } while (left > 0 && bhe != bhb && (!*bhe ||!buffer_locked(*bhe)));
%A if (bhe == bhb && !blocks)
%A break;
%A } while (left > 0);
%A
%A /* Release the read-ahead blocks */
%A while (bhe != bhb) { 剩余未向用户写的块属于预读块
%A brelse(*bhe);
%A if (++bhe == &buflist[NBUF])
%A bhe = buflist;
%A };
%A if (!read)
%A return -EIO;
%A filp->f_reada = 1;
%A return read;
%A }
%A ssize_t block_write(struct file * filp, const char * buf,
%A size_t count, loff_t *ppos)
%A {
%A struct inode * inode = filp->f_dentry->d_inode;
%A ssize_t blocksize, blocksize_bits, i, buffercount, write_error;
%A ssize_t block, blocks;
%A loff_t offset;
%A ssize_t chars;
%A ssize_t written;
%A struct buffer_head * bhlist[NBUF];
%A size_t size;
%A kdev_t dev = inode->i_rdev;
%A struct buffer_head * bh, *bufferlist[NBUF];
%A register char * p;
%A
%A if (is_read_only(dev))
%A return -EPERM;
%A
%A written = write_error = buffercount = 0;
%A blocksize = BLOCK_SIZE;
%A if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
%A blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
%A
%A i = blocksize;
%A blocksize_bits = 0;
%A while(i != 1) {
%A blocksize_bits++;
%A i >>= 1;
%A }
%A
%A block = *ppos >> blocksize_bits;
%A offset = *ppos & (blocksize-1);
%A
%A if (blk_size[MAJOR(dev)])
%A size = ((loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS) >>blocksize_bits;
%A else
%A size = INT_MAX;
%A while (count>0) {
%A if (block >= size)
%A return written ? written : -ENOSPC;
%A chars = blocksize - offset;
%A if (chars > count)
%A chars=count;
%A
%A #if 0
%A /* get the buffer head */
%A {
%A struct buffer_head * (*fn)(kdev_t, int, int) = getblk;
%A if (chars != blocksize)
%A fn = bread;
%A bh = fn(dev, block, blocksize);
%A if (!bh)
%A return written ? written : -EIO;
%A if (!buffer_uptodate(bh))
%A wait_on_buffer(bh);
%A }
%A #else
%A bh = getblk(dev, block, blocksize);
%A if (!bh)
%A return written ? written : -EIO;
%A
%A if (!buffer_uptodate(bh))
%A { 如果要写的块不在块缓冲之中或者要读的块尚未读取
%A if (chars == blocksize) 写一整块
%A wait_on_buffer(bh); 对新缓冲块来说是空操作
%A else
%A { 要写的位置起始于块的中间
%A bhlist[0] = bh;
%A if (!filp->f_reada || !read_ahead[MAJOR(dev)]) {
%A /* We do this to force the read of a single buffer */
%A blocks = 1; 当前位置上读取一块
%A } else { 从当前位置读取多个块
%A /* Read-ahead before write */
%A blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2;
%A if (block + blocks > size) blocks = size - block;
%A if (blocks > NBUF) blocks=NBUF;
%A if (!blocks) blocks = 1;
%A for(i=1; i<blocks; i++)
%A {
%A bhlist[ i ] = getblk (dev, block+i, blocksize);
%A if (!bhlist[ i ])
%A {
%A while(i >= 0) brelse(bhlist[i--]);
%A return written ? written : -EIO;
%A }
%A }
%A }
%A ll_rw_block(READ, blocks, bhlist); 提交写任务
%A for(i=1; i<blocks; i++) brelse(bhlist[ i ] );
%A wait_on_buffer(bh); 等待读写完成
%A if (!buffer_uptodate(bh)) {
%A brelse(bh); 如果有一个块读写错误,则返回错误
%A return written ? written : -EIO;
%A }
%A };
%A };
%A #endif
%A block++;
%A p = offset + bh->b_data;
%A offset = 0;
%A *ppos += chars;
%A written += chars;
%A count -= chars;
%A copy_from_user(p,buf,chars); 从用户拷贝数据到缓冲块
%A p += chars;
%A buf += chars;
%A mark_buffer_uptodate(bh, 1);
%A mark_buffer_dirty(bh);
%A if (filp->f_flags & O_SYNC)
%A bufferlist[buffercount++] = bh; 将产生的脏块排队
%A else
%A brelse(bh);
%A if (buffercount == NBUF){ 当脏块数超过缓冲区数
%A ll_rw_block(WRITE, buffercount, bufferlist); 写脏块
%A for(i=0; i<buffercount; i++){
%A wait_on_buffer(bufferlist[ i ]); 等待完成
%A if (!buffer_uptodate(bufferlist[ i ]))
%A
%A write_error=1;
%A brelse(bufferlist[ i ]);
%A }
%A buffercount=0;
%A }
%A balance_dirty(dev);
%A if (write_error)
%A break;
%A }
%A if ( buffercount ){ 写剩余的脏块
%A ll_rw_block(WRITE, buffercount, bufferlist);
%A for(i=0; i<buffercount; i++){
%A wait_on_buffer(bufferlist[ i ]);
%A if (!buffer_uptodate(bufferlist[ i ]))
%A write_error=1;
%A brelse(bufferlist[ i ]);
%A }
%A }
%A filp->f_reada = 1;
%A if(write_error)
%A return -EIO;
%A return written;
%A }
%A static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
%A {
%A return fsync_dev(dentry->d_inode->i_rdev); 刷新与块设备相关联的脏块
%A }
%A static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
%A unsigned long arg)
%A {
%A if (inode->i_bdev->bd_op->ioctl)
%A return inode->i_bdev->bd_op->ioctl(inode, file, cmd, arg);
%A return -EINVAL;
%A }
%A const struct block_device_operations * get_blkfops(unsigned int major)
%A {
%A const struct block_device_operations *ret = NULL;
%A
%A /* major 0 is used for non-device mounts */
%A if (major && major < MAX_BLKDEV) {
%A #ifdef CONFIG_KMOD
%A if (!blkdevs[major].bdops) {
%A char name[20];
%A sprintf(name, "block-major-%d", major);
%A request_module(name);
%A }
%A #endif
%A ret = blkdevs[major].bdops;
%A }
%A return ret;
%A }
%A int blkdev_put(struct block_device *bdev, int kind)
%A {
%A int ret = 0;
%A kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
%A down(&bdev->bd_sem);
%A /* syncing will go here */
%A lock_kernel();
%A if (kind == BDEV_FILE || kind == BDEV_FS)
%A fsync_dev(rdev);
%A if (atomic_dec_and_test(&bdev->bd_openers)) {
%A /* invalidating buffers will go here */
%A invalidate_buffers(rdev);
%A }
%A if (bdev->bd_op->release) {
%A struct inode * fake_inode = get_empty_inode();
%A ret = -ENOMEM;
%A if (fake_inode) {
%A fake_inode->i_rdev = rdev;
%A ret = bdev->bd_op->release(fake_inode, NULL);
%A iput(fake_inode);
%A }
%A }
%A if (!atomic_read(&bdev->bd_openers))
%A bdev->bd_op = NULL; /* we can‘t rely on driver being */
%A /* kind to stay around. */
%A unlock_kernel();
%A up(&bdev->bd_sem);
%A return ret;
%A }
%A 块设备文件inode的初始化
%A =======================
%A ; fs/devices.c
%A void init_special_inode(struct inode *inode, umode_t mode, int rdev)
%A {
%A inode->i_mode = mode;
%A if (S_ISCHR(mode)) {
%A inode->i_fop = &def_chr_fops;
%A inode->i_rdev = to_kdev_t(rdev);
%A } else if (S_ISBLK(mode)) {
%A inode->i_fop = &def_blk_fops;
%A inode->i_rdev = to_kdev_t(rdev);
%A inode->i_bdev = bdget(rdev);
%A } else if (S_ISFIFO(mode))
%A inode->i_fop = &def_fifo_fops;
%A else if (S_ISSOCK(mode))
%A inode->i_fop = &bad_sock_fops;
%A else
%A printk(KERN_DEBUG "init_special_inode: bogus imode (%o)\n", mode);
%A }
%A struct block_device *bdget(dev_t dev)
%A {
%A struct list_head * head = bdev_hashtable + hash(dev);
%A struct block_device *bdev, *new_bdev;
%A spin_lock(&bdev_lock);
%A bdev = bdfind(dev, head);
%A spin_unlock(&bdev_lock);
%A if (bdev)
%A return bdev;
%A new_bdev = alloc_bdev(); 使用kmem_cache_alloc()分配内存
%A if (!new_bdev)
%A return NULL;
%A atomic_set(&new_bdev->bd_count,1);
%A new_bdev->bd_dev = dev;
%A new_bdev->bd_op = NULL;
%A spin_lock(&bdev_lock);
%A bdev = bdfind(dev, head);
%A if (!bdev) {
%A list_add(&new_bdev->bd_hash, head);
%A spin_unlock(&bdev_lock);
%A return new_bdev;
%A }
%A spin_unlock(&bdev_lock);
%A destroy_bdev(new_bdev); 使用kmem_cache_free()释放内存
%A return bdev;
%A }
%A
%A%A
%A
*博客内容为网友个人发布,仅代表博主个人观点,如有侵权请联系工作人员删除。