新闻  |   论坛  |   博客  |   在线研讨会
分析块设备文件接口和RAMDISK驱动程序的结构二
yanqin | 2009-04-16 19:18:26    阅读:2704   发布文章

static int rd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsignedlong arg)
%A {
%A         unsigned int minor;
%A
%A         if (!inode || !inode->i_rdev)         
%A                 return -EINVAL;
%A
%A         minor = MINOR(inode->i_rdev);
%A
%A         switch (cmd) {
%A                 case BLKFLSBUF:
%A                         if (!capable(CAP_SYS_ADMIN))
%A                                 return -EACCES;
%A                         /* special: we want to release the ramdisk memory,
%A                            it‘s not like with the other blockdevices where
%A                            this ioctl only flushes away the buffer cache. */
%A                         if ((atomic_read(&inode->i_bdev->bd_openers) > 2))
%A                                 return -EBUSY;
%A                         destroy_buffers(inode->i_rdev);
%A                         rd_blocksizes[minor] = 0;
%A                         break;
%A
%A                  case BLKGETSIZE:   /* Return device size */
%A                         if (!arg)  return -EINVAL;
%A                         return put_user(rd_kbsize[minor] << 1, (long *) arg);
%A
%A                 case BLKROSET:
%A                 case BLKROGET:
%A                 case BLKSSZGET:
%A                         return blk_ioctl(inode->i_rdev, cmd, arg);
%A
%A                 default:
%A                         return -EINVAL;
%A         };
%A
%A         return 0;
%A }
%A static int rd_release(struct inode * inode, struct file * filp)
%A {
%A         MOD_DEC_USE_COUNT;
%A         return 0;
%A }
%A
%A 初始化内存盘的文件接口
%A ======================
%A static struct file_operations initrd_fops = {
%A         read:                initrd_read,
%A         release:        initrd_release,
%A };
%A static ssize_t initrd_read(struct file *file, char *buf,
%A                            size_t count, loff_t *ppos)
%A {
%A         int left;
%A
%A         left = initrd_end - initrd_start - *ppos;
%A         if (count > left) count = left;
%A         if (count == 0) return 0;
%A         copy_to_user(buf, (char *)initrd_start + *ppos, count);
%A         *ppos += count;
%A         return count;
%A }
%A static int initrd_release(struct inode *inode,struct file *file)
%A {
%A         extern void free_initrd_mem(unsigned long, unsigned long);
%A
%A         lock_kernel();
%A         if (!--initrd_users) {
%A                 blkdev_put(inode->i_bdev, BDEV_FILE);
%A                 iput(inode);
%A                 free_initrd_mem(initrd_start, initrd_end);
%A                 initrd_start = 0;
%A         }
%A         unlock_kernel();
%A         return 0;
%A }
%A
%A 块设备文件IO函数
%A ================
%A ; fs/block_dev.c
%A
%A struct block_device {
%A         struct list_head        bd_hash;
%A         atomic_t                bd_count;
%A /*        struct address_space        bd_data; */
%A         dev_t                        bd_dev;  /* not a kdev_t - it‘s a search key */
%A         atomic_t                bd_openers;
%A         const struct block_device_operations *bd_op;
%A         struct semaphore        bd_sem;        /* open/close mutex */
%A };
%A
%A struct file_operations def_blk_fops = {
%A         open:                blkdev_open,
%A         release:        blkdev_close,
%A         llseek:                block_llseek,
%A         read:                block_read,
%A         write:                block_write,
%A         fsync:                block_fsync,
%A         ioctl:                blkdev_ioctl,
%A };
%A
%A int blkdev_open(struct inode * inode, struct file * filp)
%A {
%A         int ret = -ENXIO;
%A         struct block_device *bdev = inode->i_bdev;
%A         down(&bdev->bd_sem);
%A         lock_kernel();
%A         if (!bdev->bd_op)
%A                 bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev));
%A                 ; 在设备盘打开的时候安装其块设备函数表
%A         if (bdev->bd_op) {
%A                 ret = 0;
%A                 if (bdev->bd_op->open)
%A                         ret = bdev->bd_op->open(inode,filp); 向驱动程序发送"open"消息
%A                 if (!ret)
%A                         atomic_inc(&bdev->bd_openers);
%A                 else if (!atomic_read(&bdev->bd_openers))
%A                         bdev->bd_op = NULL;
%A         }        
%A         unlock_kernel();
%A         up(&bdev->bd_sem);
%A         return ret;
%A }
%A static int blkdev_close(struct inode * inode, struct file * filp)
%A {
%A         return blkdev_put(inode->i_bdev, BDEV_FILE);
%A }
%A static loff_t block_llseek(struct file *file, loff_t offset, int origin)
%A {
%A         long long retval;
%A         kdev_t dev;
%A
%A         switch (origin) {
%A                 case 2: 尾部定位
%A                         dev = file->f_dentry->d_inode->i_rdev;
%A                         if (blk_size[MAJOR(dev)])
%A                                 offset += (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;
%A                         /* else?  return -EINVAL? */
%A                         break;
%A                 case 1: 相对定位
%A                         offset += file->f_pos;
%A         }
%A         retval = -EINVAL;
%A         if (offset >= 0) {
%A                 if (offset != file->f_pos) {
%A                         file->f_pos = offset;
%A                         file->f_reada = 0;
%A                         file->f_version = ++event;
%A                 }
%A                 retval = offset;
%A         }
%A         return retval;
%A }
%A ssize_t block_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
%A {
%A         struct inode * inode = filp->f_dentry->d_inode;
%A         size_t block;
%A         loff_t offset;
%A         ssize_t blocksize;
%A         ssize_t blocksize_bits, i;
%A         size_t blocks, rblocks, left;
%A         int bhrequest, uptodate;
%A         struct buffer_head ** bhb, ** bhe;
%A         struct buffer_head * buflist[NBUF];
%A         struct buffer_head * bhreq[NBUF];
%A         unsigned int chars;
%A         loff_t size;
%A         kdev_t dev;
%A         ssize_t read;
%A
%A         dev = inode->i_rdev;
%A         blocksize = BLOCK_SIZE; 块设备的标准尺寸是1024字节
%A         if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
%A                 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
%A         i = blocksize;
%A         blocksize_bits = 0;
%A         while (i != 1) {
%A                 blocksize_bits++;
%A                 i >>= 1;
%A         }
%A
%A         offset = *ppos;
%A         if (blk_size[MAJOR(dev)])
%A                 size = (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;取设备盘容量
%A         else
%A                 size = (loff_t) INT_MAX << BLOCK_SIZE_BITS;loff_t为64位整数,取设备盘的极限容量
%A
%A         if (offset > size)
%A                 left = 0; left为剩余所要读的字节,offset为块设备文件指针
%A         /* size - offset might not fit into left, so check explicitly. */
%A         else if (size - offset > INT_MAX)
%A                 left = INT_MAX;
%A         else
%A                 left = size - offset;
%A         if (left > count)
%A                 left = count;
%A         if (left <= 0)
%A                 return 0;
%A         read = 0;
%A         block = offset >> blocksize_bits; 求所要读的起始块号
%A         offset &= blocksize-1; 求所要读的在块中的起点
%A         size >>= blocksize_bits;
%A         rblocks = blocks = (left + offset + blocksize - 1) >> blocksize_bits;求所要读的块
%A         bhb = bhe = buflist; buflist作为环形队列,bhb是对设备的读指针,bhe是对用户的写指针
%A         if (filp->f_reada) {
%A                 if (blocks < read_ahead[MAJOR(dev)] / (blocksize >> 9))
%A                         blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9);
%A                 if (rblocks > blocks)
%A                         blocks = rblocks;
%A                 
%A         }
%A         if (block + blocks > size) { 如果预读越过设备边界
%A                 blocks = size - block;
%A                 if (blocks == 0)
%A                         return 0;
%A         }
%A
%A         /* We do this in a two stage process.  We first try to request
%A            as many blocks as we can, then we wait for the first one to
%A            complete, and then we try to wrap up as many as are actually
%A            done.  This routine is rather generic, in that it can be used
%A            in a filesystem by substituting the appropriate function in
%A            for getblk.
%A
%A            This routine is optimized to make maximum use of the various
%A            buffers and caches. */
%A
%A         do {
%A                 bhrequest = 0;
%A                 uptodate = 1;
%A                 while (blocks) {
%A                         --blocks;
%A                         *bhb = getblk(dev, block++, blocksize); 取缓冲页
%A                         if (*bhb && !buffer_uptodate(*bhb)) { 该块不在缓冲中
%A                                 uptodate = 0;
%A                                 bhreq[bhrequest++] = *bhb; 请求块排队
%A                         }
%A
%A                         if (++bhb == &buflist[NBUF])
%A                                 bhb = buflist; bhb是buflist上的循环指针,指向下一要读的块
%A
%A                         /* If the block we have on hand is uptodate, go ahead
%A                            and complete processing. */
%A                         if (uptodate) 如果当前块在缓冲中
%A                                 break;
%A                         if (bhb == bhe) 如果请求块队列已满
%A                                 break;
%A                 }
%A
%A                 /* Now request them all */
%A                 if (bhrequest) {
%A                         ll_rw_block(READ, bhrequest, bhreq); 读请求块
%A                 }
%A
%A                 do { /* Finish off all I/O that has actually completed */
%A                         if (*bhe) {
%A                                 wait_on_buffer(*bhe); 等待块解锁,即读完成,对缓冲块无效
%A                                 if (!buffer_uptodate(*bhe)) {        /* read error? */
%A                                         brelse(*bhe);
%A                                         if (++bhe == &buflist[NBUF])
%A                                           bhe = buflist;
%A                                         left = 0;
%A                                         break;
%A                                 }
%A                         }                        
%A                         if (left < blocksize - offset)
%A                                 chars = left; 如果读终止于块内,则向用户拷贝剩余字节
%A                         else
%A                                 chars = blocksize - offset; 拷贝到块尾
%A                         *ppos += chars;
%A                         left -= chars;
%A                         read += chars; 已读的字节数
%A                         if (*bhe) {
%A                                 copy_to_user(buf,offset+(*bhe)->b_data,chars);
%A                                 brelse(*bhe);
%A                                 buf += chars; 更新用户指针
%A                         } else {
%A                                 while (chars-- > 0)
%A                                         put_user(0,buf++); 向用户传送0字节
%A                         }
%A                         offset = 0; 开始从块边界拷贝
%A                         if (++bhe == &buflist[NBUF])
%A                                 bhe = buflist; bhe是buflist上的循环指针,指向下一要向用户写的块
%A                 } while (left > 0 && bhe != bhb && (!*bhe ||!buffer_locked(*bhe)));
%A                 if (bhe == bhb && !blocks)
%A                         break;
%A         } while (left > 0);
%A
%A /* Release the read-ahead blocks */
%A         while (bhe != bhb) { 剩余未向用户写的块属于预读块
%A                 brelse(*bhe);
%A                 if (++bhe == &buflist[NBUF])
%A                         bhe = buflist;
%A         };
%A         if (!read)
%A                 return -EIO;
%A         filp->f_reada = 1;
%A         return read;
%A }
%A ssize_t block_write(struct file * filp, const char * buf,
%A                     size_t count, loff_t *ppos)
%A {
%A         struct inode * inode = filp->f_dentry->d_inode;
%A         ssize_t blocksize, blocksize_bits, i, buffercount, write_error;
%A         ssize_t block, blocks;
%A         loff_t offset;
%A         ssize_t chars;
%A         ssize_t written;
%A         struct buffer_head * bhlist[NBUF];
%A         size_t size;
%A         kdev_t dev = inode->i_rdev;
%A         struct buffer_head * bh, *bufferlist[NBUF];
%A         register char * p;
%A
%A         if (is_read_only(dev))
%A                 return -EPERM;
%A
%A         written = write_error = buffercount = 0;
%A         blocksize = BLOCK_SIZE;
%A         if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
%A                 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
%A
%A         i = blocksize;
%A         blocksize_bits = 0;
%A         while(i != 1) {
%A                 blocksize_bits++;
%A                 i >>= 1;
%A         }
%A
%A         block = *ppos >> blocksize_bits;
%A         offset = *ppos & (blocksize-1);
%A
%A         if (blk_size[MAJOR(dev)])
%A                 size = ((loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS) >>blocksize_bits;
%A         else
%A                 size = INT_MAX;
%A         while (count>0) {
%A                 if (block >= size)
%A                         return written ? written : -ENOSPC;
%A                 chars = blocksize - offset;
%A                 if (chars > count)
%A                         chars=count;
%A
%A #if 0
%A                 /* get the buffer head */
%A                 {
%A                         struct buffer_head * (*fn)(kdev_t, int, int) = getblk;
%A                         if (chars != blocksize)
%A                                 fn = bread;
%A                         bh = fn(dev, block, blocksize);
%A                         if (!bh)
%A                                 return written ? written : -EIO;
%A                         if (!buffer_uptodate(bh))
%A                                 wait_on_buffer(bh);
%A                 }
%A #else
%A                 bh = getblk(dev, block, blocksize);
%A                 if (!bh)
%A                         return written ? written : -EIO;
%A
%A                 if (!buffer_uptodate(bh))
%A                 { 如果要写的块不在块缓冲之中或者要读的块尚未读取
%A                   if (chars == blocksize) 写一整块
%A                     wait_on_buffer(bh); 对新缓冲块来说是空操作
%A                   else
%A                   { 要写的位置起始于块的中间
%A                     bhlist[0] = bh;
%A                     if (!filp->f_reada || !read_ahead[MAJOR(dev)]) {
%A                       /* We do this to force the read of a single buffer */
%A                       blocks = 1; 当前位置上读取一块
%A                     } else { 从当前位置读取多个块
%A                       /* Read-ahead before write */
%A                       blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2;
%A                       if (block + blocks > size) blocks = size - block;
%A                       if (blocks > NBUF) blocks=NBUF;
%A                       if (!blocks) blocks = 1;
%A                       for(i=1; i<blocks; i++)
%A                       {
%A                         bhlist[ i ] = getblk (dev, block+i, blocksize);
%A                         if (!bhlist[ i ])
%A                         {
%A                           while(i >= 0) brelse(bhlist[i--]);
%A                           return written ? written : -EIO;
%A                         }
%A                       }
%A                     }
%A                     ll_rw_block(READ, blocks, bhlist); 提交写任务
%A                     for(i=1; i<blocks; i++) brelse(bhlist[ i ] );
%A                     wait_on_buffer(bh); 等待读写完成
%A                     if (!buffer_uptodate(bh)) {
%A                           brelse(bh); 如果有一个块读写错误,则返回错误
%A                           return written ? written : -EIO;
%A                     }
%A                   };
%A                 };
%A #endif
%A                 block++;
%A                 p = offset + bh->b_data;
%A                 offset = 0;
%A                 *ppos += chars;
%A                 written += chars;
%A                 count -= chars;
%A                 copy_from_user(p,buf,chars); 从用户拷贝数据到缓冲块
%A                 p += chars;
%A                 buf += chars;
%A                 mark_buffer_uptodate(bh, 1);
%A                 mark_buffer_dirty(bh);
%A                 if (filp->f_flags & O_SYNC)
%A                         bufferlist[buffercount++] = bh; 将产生的脏块排队
%A                 else
%A                         brelse(bh);
%A                 if (buffercount == NBUF){ 当脏块数超过缓冲区数
%A                         ll_rw_block(WRITE, buffercount, bufferlist); 写脏块
%A                         for(i=0; i<buffercount; i++){
%A                                 wait_on_buffer(bufferlist[ i ]); 等待完成
%A                                 if (!buffer_uptodate(bufferlist[ i ]))
%A
%A                                         write_error=1;
%A                                 brelse(bufferlist[ i ]);
%A                         }
%A                         buffercount=0;
%A                 }
%A                 balance_dirty(dev);
%A                 if (write_error)
%A                         break;
%A         }
%A         if ( buffercount ){ 写剩余的脏块
%A                 ll_rw_block(WRITE, buffercount, bufferlist);
%A                 for(i=0; i<buffercount; i++){
%A                         wait_on_buffer(bufferlist[ i ]);
%A                         if (!buffer_uptodate(bufferlist[ i ]))
%A                                 write_error=1;
%A                         brelse(bufferlist[ i ]);
%A                 }
%A         }                
%A         filp->f_reada = 1;
%A         if(write_error)
%A                 return -EIO;
%A         return written;
%A }
%A static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
%A {
%A         return fsync_dev(dentry->d_inode->i_rdev); 刷新与块设备相关联的脏块
%A }
%A static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
%A                         unsigned long arg)
%A {
%A         if (inode->i_bdev->bd_op->ioctl)
%A                 return inode->i_bdev->bd_op->ioctl(inode, file, cmd, arg);
%A         return -EINVAL;
%A }
%A const struct block_device_operations * get_blkfops(unsigned int major)
%A {
%A         const struct block_device_operations *ret = NULL;
%A
%A         /* major 0 is used for non-device mounts */
%A         if (major && major < MAX_BLKDEV) {
%A #ifdef CONFIG_KMOD
%A                 if (!blkdevs[major].bdops) {
%A                         char name[20];
%A                         sprintf(name, "block-major-%d", major);
%A                         request_module(name);
%A                 }
%A #endif
%A                 ret = blkdevs[major].bdops;
%A         }
%A         return ret;
%A }
%A int blkdev_put(struct block_device *bdev, int kind)
%A {
%A         int ret = 0;
%A         kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
%A         down(&bdev->bd_sem);
%A         /* syncing will go here */
%A         lock_kernel();
%A         if (kind == BDEV_FILE || kind == BDEV_FS)
%A                 fsync_dev(rdev);
%A         if (atomic_dec_and_test(&bdev->bd_openers)) {
%A                 /* invalidating buffers will go here */
%A                 invalidate_buffers(rdev);
%A         }
%A         if (bdev->bd_op->release) {
%A                 struct inode * fake_inode = get_empty_inode();
%A                 ret = -ENOMEM;
%A                 if (fake_inode) {
%A                         fake_inode->i_rdev = rdev;
%A                         ret = bdev->bd_op->release(fake_inode, NULL);
%A                         iput(fake_inode);
%A                 }
%A         }
%A         if (!atomic_read(&bdev->bd_openers))
%A                 bdev->bd_op = NULL;        /* we can‘t rely on driver being */
%A                                         /* kind to stay around. */
%A         unlock_kernel();
%A         up(&bdev->bd_sem);
%A         return ret;
%A }
%A 块设备文件inode的初始化
%A =======================
%A ; fs/devices.c
%A void init_special_inode(struct inode *inode, umode_t mode, int rdev)
%A {
%A         inode->i_mode = mode;
%A         if (S_ISCHR(mode)) {
%A                 inode->i_fop = &def_chr_fops;
%A                 inode->i_rdev = to_kdev_t(rdev);
%A         } else if (S_ISBLK(mode)) {
%A                 inode->i_fop = &def_blk_fops;
%A                 inode->i_rdev = to_kdev_t(rdev);
%A                 inode->i_bdev = bdget(rdev);
%A         } else if (S_ISFIFO(mode))
%A                 inode->i_fop = &def_fifo_fops;
%A         else if (S_ISSOCK(mode))
%A                 inode->i_fop = &bad_sock_fops;
%A         else
%A                 printk(KERN_DEBUG "init_special_inode: bogus imode (%o)\n", mode);
%A }
%A struct block_device *bdget(dev_t dev)
%A {
%A         struct list_head * head = bdev_hashtable + hash(dev);
%A         struct block_device *bdev, *new_bdev;
%A         spin_lock(&bdev_lock);
%A         bdev = bdfind(dev, head);
%A         spin_unlock(&bdev_lock);
%A         if (bdev)
%A                 return bdev;
%A         new_bdev = alloc_bdev(); 使用kmem_cache_alloc()分配内存
%A         if (!new_bdev)
%A                 return NULL;
%A         atomic_set(&new_bdev->bd_count,1);
%A         new_bdev->bd_dev = dev;
%A         new_bdev->bd_op = NULL;
%A         spin_lock(&bdev_lock);
%A         bdev = bdfind(dev, head);
%A         if (!bdev) {
%A                 list_add(&new_bdev->bd_hash, head);
%A                 spin_unlock(&bdev_lock);
%A                 return new_bdev;
%A         }
%A         spin_unlock(&bdev_lock);
%A         destroy_bdev(new_bdev); 使用kmem_cache_free()释放内存
%A         return bdev;
%A }
%A
%A%A
%A

*博客内容为网友个人发布,仅代表博主个人观点,如有侵权请联系工作人员删除。

参与讨论
登录后参与讨论
推荐文章
最近访客