Wednesday, December 18, 2013

Write to block device from kernel...


int
disk_open(dev_minor_t *devinst, const char *path)
{
        int     partnum;
        dev_t   devt;
        struct  hd_struct *part;
        unsigned char   *buf = NULL;
        struct  bio *bb;
        int     i, j;
        struct  page *page;
        unsigned char *d = NULL;
        DECLARE_COMPLETION_ONSTACK(waithdl);

        if ((devt = blk_lookup_devt(path, 0)) == 0) {
                printk(KERN_INFO "blk_lookup_devt %d\n", devt);
                return (-1);
        }
        printk(KERN_INFO "Success blk_lookup_devt %d\n", devt);
        if ((devinst->disk = get_gendisk(devt, &partnum)) == NULL) {
                printk(KERN_INFO "Failed to get generic device ..");
                return (-1);
        }
  
        if ((part = disk_get_part(devinst->disk, 0)) == NULL) {
                printk(KERN_INFO "failed disk_get_part");
                return (-1);
        }
  
        if ((devinst->bdev = bdget_disk(devinst->disk, 0)) == NULL) {
                printk(KERN_INFO "failed bdget_disk");
                return (-1);
        }
        return (0);
}


void
disk_close(struct file *filp)
{
     
}


int
disk_read(dev_minor_t *devinst, unsigned long long offset,
                                       unsigned char *data, unsigned int size)
{
        struct  page *page;
        unsigned char *d = NULL;
        DECLARE_COMPLETION_ONSTACK(waithdl);
        unsigned char   *buf = NULL;
        struct  bio *bb;
        int     ret = 0;

        printk(KERN_INFO "Enter %s\n", __FUNCTION__);
        if (data == NULL) {
                page = alloc_page(GFP_KERNEL);
                d = page_address(page);
                memset(d, 0x00, 4096);
        } else {
                d = data;
                memset(d, 0x00, 4096);
        }

        bb = bio_map_kern(devinst->disk->queue, d, 4096, GFP_KERNEL);
        if (IS_ERR(bb)) {
                printk(KERN_ALERT "FAiled to map kernel memory");
                return (-1);
        }
        //bb->bi_sector = offset >> 9;
        bb->bi_sector = 0;
        printk(KERN_INFO "offset %d size %d\n", bb->bi_sector, size);
        bb->bi_bdev = devinst->bdev->bd_contains;
        if (bb->bi_bdev == NULL) {
                bb->bi_bdev = blkdev_get_by_dev(devinst->bdev->bd_dev, FMODE_READ|FMODE_WRITE, NULL);
        }
        if (bb->bi_bdev == NULL) {
                printk(KERN_ALERT "Doesn't have blk device ...\n");
                return (-1);
        }
        bb->bi_end_io = callback_fn;
        bb->bi_private = &waithdl;
        bio_get(bb);

        submit_bio(READ_SYNC, bb); //read operation
        wait_for_completion(&waithdl);

        if (!bio_flagged(bb, BIO_UPTODATE)) {
               printk(KERN_ALERT "Failed ...");
                ret = -1;
        }
        bio_put(bb);
        printk(KERN_INFO "Data read %x%x%x%x", d[0],d[1],d[2],d[3]);
#if 0
        if (data)
                memcpy(data, d, 4096);
#endif
        return (ret);
}

int
disk_write(dev_minor_t *devinst, unsigned long long offset,
                                     unsigned char *data, unsigned int size)
{
        struct  page *page;
        unsigned char *d = NULL;
        DECLARE_COMPLETION_ONSTACK(waithdl);
        unsigned char   *buf = NULL;
        struct  bio *bb;
        struct bio_vec bio_vec;

        int     ret = 0;

        printk(KERN_INFO "Enter %s\n", __FUNCTION__);

        bb = bio_map_kern(devinst->disk->queue, data, 4096, GFP_KERNEL);
        if (IS_ERR(bb)) {
                printk(KERN_ALERT "FAiled to map kernel memory");
                return (-1);
        }
        bb->bi_sector = 1;
        bb->bi_bdev = devinst->bdev->bd_contains;
        if (bb->bi_bdev == NULL) {
                bb->bi_bdev = blkdev_get_by_dev(devinst->bdev->bd_dev, FMODE_READ|FMODE_WRITE, NULL);
        }
        if (bb->bi_bdev == NULL) {
                printk(KERN_ALERT "Doesn't have blk device ...\n");
                return (-1);
        }
        bb->bi_end_io = callback_fn;
        bb->bi_private = &waithdl;

        bio_get(bb);
        submit_bio(WRITE_SYNC, bb);

        wait_for_completion(&waithdl);

        if (!test_bit(BIO_UPTODATE, &bb->bi_flags)) {
                bio_put(bb);
                ret = -1;
        }
        bio_put(bb);
        return (ret);
}

/* backup code */
page_read() {

        bio_init(&bio);
        bio.bi_io_vec = &bio_vec;
        bio_vec.bv_page = p;
        bio_vec.bv_len = size;
        bio_vec.bv_offset = 0;
        bio.bi_vcnt = 1;
        bio.bi_idx = 0;
        bio.bi_size = size;
        bio.bi_bdev = bdev->bd_contains;
        bio.bi_sector = sect;
        init_completion(&complete);
        bio.bi_private = &complete;
        bio.bi_end_io = rq_complete;

        //submit_bio(WRITE_SYNC, &bio);
        submit_bio(READ_SYNC, &bio);
        wait_for_completion(&complete);

        if (test_bit(BIO_UPTODATE, &bio.bi_flags)) {
               printk(KERN_INFO "Read BIO OK 0x%x%x%x%x\n", d[0],d[1],d[2],d[3]);
        } else {
                printk(KERN_INFO "BIO ERROR\n");
        }
}







Monday, December 9, 2013

How to write to a file from a kernel module

/* The below code, can be used to create a file and write data to a file using vfs call.
*/

#include <linux/fs.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/buffer_head.h>

struct file *
driver_file_open(const char *path, int flags, int mode)
{
        struct file *filp = NULL;
        mm_segment_t    oldfs;
        oldfs   = get_fs();
        set_fs(get_ds());
        filp = filp_open(path, O_CREAT|O_RDWR, S_IRWXU|S_IRWXG|S_IRWXO);
        set_fs(oldfs);
        return (filp);
}



void
driver_file_close(struct file *filp)
{
        filp_close(filp, NULL);
}



int
driver_file_write(struct file *file, unsigned long long offset, unsigned char *data, unsigned int size)
{
        int     ret;
        mm_segment_t    oldfs;
        loff_t  pos = offset;
        oldfs   = get_fs();
        set_fs(get_ds());

        //vfs_setpos(file, pos, pos + PAGE_SIZE);
        //Workaround for vfs_setpos, not implemented on my version of linux.
        spin_lock(&file->f_lock);
        file->f_pos = pos;
        //file->f_version = 0;
        printk(KERN_INFO "set position to  %llx\n", pos);
        spin_unlock(&file->f_lock);


        ret = vfs_write(file, data, size, &pos);
        //vfs_fsync(file, 0);
        set_fs(oldfs);
        return (ret);
}




int
driver_file_read(struct file *file, unsigned long long offset, unsigned char *data, unsigned int size)
{
        int     ret;
        mm_segment_t    oldfs;
        loff_t  pos = offset;
        oldfs   = get_fs();
        set_fs(get_ds());

        //vfs_setpos(file, pos, pos + PAGE_SIZE);
        //Workaround for vfs_setpos, not implemented on my version of linux.
        spin_lock(&file->f_lock);
        file->f_pos = pos;
        //file->f_version = 0;
        printk(KERN_INFO "set position to read %llx\n", pos);
        spin_unlock(&file->f_lock);


        ret = vfs_read(file, data, size, &pos);
        //vfs_fsync(file, 0);
        set_fs(oldfs);
        return (ret);
}

Thursday, May 30, 2013

Internals of Executing a user program.

My first C program was hello world!!.  It was written in a Unix Environment. In this post I will try to cover, as what happens when that binary executable is executed on a shell prompt.

This blog will mostly look in to code journey(path) taken when a program is executed by an user.

As we all know, all binary executable is executed at the the shell(directly or indirectly).  Their is reason for this, shell is a command-line interpreter that provides an user interface for an OS.

To execute an executable program, some preparation is needed and most of it done by kernel. To interact with kernel user needs to invoke specific system call.

When we run a simple program like hello_wold at the shell prompt, the command line interpreter invokes  execve() //system call SYS_execve.

All the argc, argv and envp gets copied from userspace to kernel space.


sys_execve()                                                                     arch/x86/kernel/process.c
{
      do_execve()
}

do_execve()                                                                       fs/exec.c
{
      do_execve_common()
}

do_execve_common()                                                        fs/exec.c
{
       prepare struct linux_binprm                                        include/linux/binfmts.h
       it holds all the information required when loading binary file.
       search_binary_handler();

}

search_binary_handler()                                                       fs/exec.c


{
          Lookup for load_binary function callback based on executable format (ELF, COEFF)
          Iterate and match against linux_binprm thru' all supported format.
          Invoke fmt->load_binary callback, since we know the format type is ELF.
}

load_elf_binary()                                                                  fs/binfmt_elf.c
{
           // More information needs to be added w.r.t stack initialization/memory for bss
          // and elf format.
           start_thread() macro calls _dl_start() i.e invoke dynamic linker /lib/ld-linux.so
}


Wednesday, May 22, 2013

How to Reserve RAM

Determine total memory available.

cat /proc/meminfo dumps the memory information

krmohan@krmohan:~$ cat /proc/meminfo
MemTotal:        4040084 kB  (~ 3.8 G)
MemFree:         3125056 kB  (~ 2.9 G) free

Locate grub file on your system.
1) Ubuntu
Add or append an entry to /etc/default/grub
GRUB_CMDLINE_LINUX="mem=2000m"

Rebuild grub.cfg with your changes
krmohan@krmohan:~$ sudo update-grub


or

memmap=64M$1024M in grub file reserve 64MB at offset 1G


Reboot your system

 krmohan@krmohan:~$ cat /proc/meminfo
MemTotal:        2001116 kB  (~1.8 G)  2 G gone after your changes
MemFree:         1318796 kB

Driver Changes
============
In your init or probe function

{
      /* Tell the kernel you have reserved this resource, thus preventing other driver to do the same */
       request_mem_region(2000*1024*1024, size, "who r u");
       /* get the virtual address of the physical address */
     virt_addr =  ioremap_nocache(2000*1024*1024, size);
}

Make sure, you release the resource in _exit or release
{
       /* Tell kernel that you are done with the resource */
       release_mem_region(2000*1024*1024, size);
      /* unmap io memory */
       iounmap(virt_addr);
}

read_entry_point()
{
           printk(KERN_INFO "ioread32 %x",  ioread32(virt_addr + i));
}

In Userspace
=========
#define RAM_START       (2000 * 1024 * 1024)
#define MAP_SIZE        (unsigned)48*1024*1024
#define MAP_MASK        (MAP_SIZE - 1)

volatile        char *hw = (volatile char *)RAM_START;
memfd = open("/dev/mem", O_RDWR|O_SYNC);
membase = mmap(0,  size, ROT_READ|PROT_WRITE, MAP_SHARED, memfd, (unsigned) hw & ~MAP_MASK);
(unsigned int *)membase[i] = rand();

fd = open("my_driver", O_RDONLY);
read(fd, x, 1);   // verify with kernel message, what u wrote was seen by driver.
close(fd)

In kernel
======
 #define OFFSET 0x40000000   //1G



mmap()
{
       unsigned long vsize = vma->vm_end - vma->vm_start;
     /* mmap physical memory @ 1G */
        if (remap_pfn_range(vma, vma->vm_start,
                        //virt_to_phys(virt_addr) >> PAGE_SHIFT,
                        OFFSET >> PAGE_SHIFT,
                vsize, vma->vm_page_prot)) {
                printk(KERN_INFO "Failed ...\n");
                return -EAGAIN;
        }
        printk(KERN_INFO "Size mmap is %ld\n", vsize);
        return (0);
}