/*
 * Cluster Information Service - a monitoring system for Linux clusters
 * Copyright (C) 2000 Institute of Informatics, Slovak Academy of Sciences.
 * Written by Jan Astalos (astalos.ui@savba.sk)
 * 
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of version 2 of the GNU General Public License as published
 * by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
 * more details.
 * 
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston MA 02111-1307, USA.
 *
 * Kernel module for monitoring of processes with fork/exit notification via
 * netlink device.
 */

#include <linux/config.h>
#include <linux/module.h>

#ifdef MODVERSIONS
#include <linux/modversions.h>
#endif

#include <sys/syscall.h>
#include <asm/uaccess.h>
#include <net/sock.h>
#include <linux/smp_lock.h>
#include <linux/proc_fs.h>
#include <linux/netlink.h>
#include <linux/init.h>

#include "cis.h"
#include "cis_mon.h"

/* Some usefull macros from bits/waitstatus.h
 * We need to test the status of exited child process in sys_wait4
 */
#define __WTERMSIG(status)     ((status) & 0x7f)
#define __WIFSTOPPED(status)   (((status) & 0xff) == 0x7f)
#define __WIFEXITED(status)    (__WTERMSIG(status) == 0)
#define __WIFSIGNALED(status)  (!__WIFSTOPPED(status) && !__WIFEXITED(status))

#define LOAD_CONV(a) ((a+(FIXED_1/200)) / (FIXED_1/100))

static struct sock *pmsk;
extern void *sys_call_table[];
//extern int nr_free_pages;

/* Old handlers */
int (*old_exit)   (int code);
int (*old_fork)   (struct pt_regs regs);
int (*old_execve) (struct pt_regs regs);
int (*old_clone)  (struct pt_regs regs);
int (*old_vfork)  (struct pt_regs regs);
int (*old_waitpid)(pid_t pid,unsigned int * stat_addr, int options);
int (*old_wait4)  (pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);

int read_proclist(char *buffer, char **start, off_t offset, int length, int dummy)
{
        struct task_struct *tsk;
        int len = 0;
        off_t pos = 0;
        off_t begin;
	struct cis_procinfo p;
        
        for_each_task (tsk) {
                
		pos += sizeof(struct cis_procinfo);
                if (pos < offset)
                        continue;
                
                p.pid        = tsk->pid;
                p.ppid       = tsk->p_pptr->pid;
                p.uid        = tsk->uid;
                p.priority   = tsk->priority;
                p.start_time = tsk->start_time;
                p.utime      = tsk->times.tms_utime;
                p.stime      = tsk->times.tms_stime;
                p.minflt     = tsk->min_flt;
                p.majflt     = tsk->maj_flt;
                p.rss        = tsk->mm->rss << PAGE_SHIFT;
                p.vm         = tsk->mm->total_vm << PAGE_SHIFT;
                p.rd_bytes   = tsk->rd_bytes;
                p.wr_bytes   = tsk->wr_bytes;
                
                memcpy (p.cmd, tsk->comm, 16);

		memcpy (buffer + len, &p, sizeof(struct cis_procinfo));
                len += sizeof(struct cis_procinfo);
                if(len >= length)
                        break;
        }

        begin = len - (pos - offset);
        *start = buffer + begin;
        len -= begin;
        if(len > length)
                len = length;
        if (len < 0)
                len = 0;
        return len;
}

static struct proc_dir_entry proc_root_proclist = {
        0,                               /* Inode number      */
        12, "cis_proclist",              /* The name of file with length */
        S_IFREG | S_IRUGO,               /* Acess permissions */
        1, 0, 0,                         /* Number of links, owner, group */
        0,                               /* The size of the file reported by ls. */
        NULL,                            /* Operations - use default */
        read_proclist,                   /* The read function */
        /* nothing more */
};

static int procmon_msg(int pid, unsigned char event)
{
        struct cis_procinfo *msg;
        struct task_struct *tsk = find_task_by_pid (pid);

	struct sk_buff *outskb = alloc_skb(sizeof(struct cis_procinfo), GFP_ATOMIC);

	if (outskb) {
		skb_put(outskb, sizeof(struct cis_procinfo));
		msg = (struct cis_procinfo *) outskb->data;

		msg->priv       = (void *) (int) event;
		msg->pid        = pid;
		if (tsk) {
			msg->ppid       = tsk->p_pptr->pid;
                        msg->uid        = tsk->uid;
                        msg->priority   = tsk->priority;
                        msg->start_time = tsk->start_time;
                        msg->utime      = tsk->times.tms_utime;
                        msg->stime      = tsk->times.tms_stime;
                        msg->minflt     = tsk->min_flt;
                        msg->majflt     = tsk->maj_flt;
                        msg->rss        = tsk->mm->rss << PAGE_SHIFT;
                        msg->vm         = tsk->mm->total_vm << PAGE_SHIFT;
			msg->rd_bytes   = tsk->rd_bytes;
			msg->wr_bytes   = tsk->wr_bytes;
                        memcpy (msg->cmd, tsk->comm, 16);
                }

                
                netlink_broadcast(pmsk, outskb, 0, ~0, GFP_KERNEL);
        }
        else
                return (-1);
        
        return (0);
}

/*
 * New handlers.
 */

asmlinkage int new_exit(int code)
{
        procmon_msg(current->pid, OBJ_DESTROY);

        return old_exit(code);
}

asmlinkage int new_fork(struct pt_regs regs)
{
        int pid = old_fork(regs);

        if (pid > 0)
                procmon_msg(pid, OBJ_CREATE);

        return pid;
}

/*
 * sys_execve cannot be wrapped. I don't know why... Executed processes got
 * SIGSEGV and I gave it up. Anyway, it still looks better than :
 *
 * __asm__ volatile ("int $0x80"...
 *
 * Original code taken from arch/i386/process.c
 */
asmlinkage int new_execve(struct pt_regs regs)
{
	int error;
	char * filename;

	lock_kernel();
	filename = getname((char *) regs.ebx);
	error = PTR_ERR(filename);
	if (IS_ERR(filename))
		goto out;
	error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs);
	if (error == 0)
		current->flags &= ~PF_DTRACE;
	putname(filename);
out:
	unlock_kernel();

        if (error == 0)
		procmon_msg(current->pid, OBJ_CHANGE);

        return error;
}

asmlinkage int new_clone(struct pt_regs regs)
{
        int pid = old_clone(regs);

        if (pid > 0)
                procmon_msg(pid, OBJ_CREATE);

        return pid;
}

asmlinkage int new_vfork(struct pt_regs regs)
{
        int pid = old_vfork(regs);

        if (pid > 0)
		procmon_msg(pid, OBJ_CREATE);

        return pid;
}

asmlinkage int new_waitpid(pid_t pid,unsigned int * stat_addr, int options)
{
        int err;
        
        MOD_INC_USE_COUNT;

        err = old_waitpid(pid, stat_addr, options);
        if (err > 0 && stat_addr && __WIFSIGNALED(*stat_addr))
                procmon_msg(err, OBJ_DESTROY);

        MOD_DEC_USE_COUNT;

        return err;
}

asmlinkage int new_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
{
        int err;

        MOD_INC_USE_COUNT;

        err = old_wait4(pid, stat_addr, options, ru);
        if (err > 0 && stat_addr && __WIFSIGNALED(*stat_addr))
                procmon_msg(err, OBJ_DESTROY);

        MOD_DEC_USE_COUNT;

        return err;
}

struct file_operations old_ext2_file_ops;

ssize_t new_ext2_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
{
	ssize_t ret = old_ext2_file_ops.read (filp, buf, count, ppos);

	if (ret > 0)
		current->rd_bytes += ret;
	
	return ret;
}

ssize_t new_ext2_write(struct file * filp, const char * buf, size_t count, loff_t *ppos)
{
	ssize_t ret = old_ext2_file_ops.write (filp, buf, count, ppos);

	if (ret > 0)
		current->wr_bytes += ret;

	return ret;
}

int procmon_init(void)
{
        int err;
        
	pmsk = netlink_kernel_create(NETLINK_PROCMON, NULL);
        if (pmsk == NULL) {
                printk("procmon_init: cannot initialize netlink\n");
                return -ENODEV;
        }
        
        err = proc_register(&proc_root, &proc_root_proclist);
        if (err) {
                printk("procmon_init: cannot register proclist\n");
                return err;
        }

        old_exit = sys_call_table[SYS_exit];
        sys_call_table[SYS_exit] = new_exit;

        old_fork = sys_call_table[SYS_fork];
        sys_call_table[SYS_fork] = new_fork;

        old_execve = sys_call_table[SYS_execve];
        sys_call_table[SYS_execve] = new_execve;

        old_clone = sys_call_table[SYS_clone];
        sys_call_table[SYS_clone] = new_clone;

        old_vfork = sys_call_table[SYS_vfork];
        sys_call_table[SYS_vfork] = new_vfork;

        old_waitpid = sys_call_table[SYS_waitpid];
        sys_call_table[SYS_waitpid] = new_waitpid;

        old_wait4 = sys_call_table[SYS_wait4];
        sys_call_table[SYS_wait4] = new_wait4;

	old_ext2_file_ops = *ext2_file_inode_operations.default_file_ops;
	ext2_file_inode_operations.default_file_ops->read = new_ext2_read;
	ext2_file_inode_operations.default_file_ops->write = new_ext2_write;
//        printk (KERN_INFO "Process monitoring enabled.\n");

        return 0;
}

int init_module(void)
{
        return procmon_init ();
}

void cleanup_module(void)
{
        if (sys_call_table[SYS_exit] == new_exit)
                sys_call_table[SYS_exit] = old_exit;
        if (sys_call_table[SYS_fork] == new_fork)
                sys_call_table[SYS_fork] = old_fork;
        if (sys_call_table[SYS_execve] == new_execve)
                sys_call_table[SYS_execve] = old_execve;
        if (sys_call_table[SYS_clone] == new_clone)
                sys_call_table[SYS_clone] = old_clone;
        if (sys_call_table[SYS_vfork] == new_vfork)
                sys_call_table[SYS_vfork] = old_vfork;
        if (sys_call_table[SYS_waitpid] == new_waitpid)
                sys_call_table[SYS_waitpid] = old_waitpid;
        if (sys_call_table[SYS_wait4] == new_wait4)
                sys_call_table[SYS_wait4] = old_wait4;

	ext2_file_inode_operations.default_file_ops->read =
		old_ext2_file_ops.read;

	ext2_file_inode_operations.default_file_ops->write =
		old_ext2_file_ops.write;
	
        proc_unregister(&proc_root, proc_root_proclist.low_ino);
        sock_release (pmsk->socket);

//	printk (KERN_INFO "Process monitoring disabled.\n");
}

                