Posts

Showing posts from November, 2016

Search this blog

Ipset walk

1. Init of the module:
hash_mac_init(void)
|
|-> ip_set_type_register(&hash_mac_type);
|
|-> list_add_rcu(&hash_mac_type->list, &ip_set_type_list);


2. static struct ip_set_type hash_mac_type __read_mostly = {
         .name           = "hash:mac",
         .protocol       = IPSET_PROTOCOL,
         .features       = IPSET_TYPE_MAC,
         .dimension      = IPSET_DIM_ONE,
         .family         = NFPROTO_UNSPEC,
         .revision_min   = IPSET_TYPE_REV_MIN,
         .revision_max   = IPSET_TYPE_REV_MAX,
         .create         = hash_mac_create,
         .create_policy  = {
                 [IPSET_ATTR_HASHSIZE]   = { .type = NLA_U32 },
                 [IPSET_ATTR_MAXELEM]    = { .type = NLA_U32 },
                 [IPSET_ATTR_PROBES]     = { .type = NLA_U8 },
                 [IPSET_ATTR_RESIZE]     = { .type = NLA_U8  },
                 [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
                 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
         },
         .adt_policy     = {
                 [IPSET_ATTR_ETHER]      = { .type = NLA_BINARY,
                                             .len  = ETH_ALEN },
                 [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
                 [IPSET_ATTR_LINENO]     = { .type = NLA_U32 },
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING,
                                             .len  = IPSET_MAX_COMMENT_SIZE },
                 [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
                 [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
                 [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
 };

3.

1.What is the purpose of HTYPE, it is not getting used further.

  /* Type specific function prefix */
#define HTYPE           hash_mac

  2. What is the purpose of MTYPE.
#define MTYPE           hash_mac4

Most of the functions are generated and the macros are used in the
included ip_set_hash_gen.h file. HTYPE is used for the IPv4/IPv6
independent functions while MTYPE required for the IPv4, IPv6 specific
ones.

3. Why 2 different functions are used. KADT and UADT. I understand ADT
is Add delete, Test and K and U might be Kernel space and User space.
But what is different operation of KADT and UADT.

Those are the kernel and userspace common routines for add, del and test
elements. The functions are separated because the input comes in different
forms: in the userspace case in netlink attributes, while in the kernel
case through the API.

4. Who calls Kadt in kernel space. How is UADT called from User space.

The kadt functions are called from the kernel part interfaces (ip_set_add,
etc) while the uadt ones from ip_set_uadd, etc (see ip_set_core.c). Please
note uadt functions are NOT called in userspace: the functions work on
data sent FROM userspace.

4.

Userspace command: ipset

Function ipset_cmd(session, cmd, restore_line);

/**
 * ipset_cmd - execute a command
 * @session: session structure
 * @cmd: command to execute
 * @lineno: command line number in restore mode
 *
 * Execute - or prepare/buffer in restore mode - a command.
 * It is the caller responsibility that the data field be filled out
 * with all required parameters for a successful execution.
 * The data field is cleared after this function call for the public
 * commands.
 *
 * Returns 0 on success or a negative error code.
 */

int ipset_cmd(struct ipset_session *session, enum ipset_cmd cmd, uint32_t lineno)
{
        struct ipset_data *data;
        bool aggregate = false;
        int ret = -1;

        assert(session);

        if (cmd <= IPSET_CMD_NONE || cmd >= IPSET_MSG_MAX)
                return 0;

        /* Initialize transport method if not done yet */
        if (session->handle == NULL && init_transport(session) == NULL)
                return ipset_err(session,
                                 "Cannot open session to kernel.");

        data = session->data;

        /* Check protocol version once */
        if (!session->version_checked) {
                if (build_send_private_msg(session, IPSET_CMD_PROTOCOL) < 0)
                        return -1;
        }
        /* Private commands */
        if (cmd == IPSET_CMD_TYPE || cmd == IPSET_CMD_HEADER)
                return build_send_private_msg(session, cmd);

        /* Check aggregatable commands */
        aggregate = may_aggregate_ad(session, cmd);
        if (!aggregate) {
                /* Flush possible aggregated commands */
                ret = ipset_commit(session);
                if (ret < 0)
                        return ret;
        }

        /* Real command: update lineno too */
        session->cmd = cmd;
        session->lineno = lineno;

        /* Set default output mode */
        if (cmd == IPSET_CMD_LIST) {
                if (session->mode == IPSET_LIST_NONE)
                        session->mode = IPSET_LIST_PLAIN;
        } else if (cmd == IPSET_CMD_SAVE) {
                if (session->mode == IPSET_LIST_NONE)
                        session->mode = IPSET_LIST_SAVE;
        }
        /* Start the root element in XML mode */
        if ((cmd == IPSET_CMD_LIST || cmd == IPSET_CMD_SAVE) &&
            session->mode == IPSET_LIST_XML)
                safe_snprintf(session, "\n");

        D("next: build_msg");
        /* Build new message or append buffered commands */
        ret = build_msg(session, aggregate);
        D("build_msg returned %u", ret);
        if (ret > 0) {
                /* Buffer is full, send buffered commands */
                ret = ipset_commit(session);
                if (ret < 0)
                        goto cleanup;
                ret = build_msg(session, false);
                D("build_msg 2 returned %u", ret);
        }
        if (ret < 0)
                goto cleanup;
        D("past: build_msg");

        /* We have to save the type for error handling */
        session->saved_type = ipset_data_get(data, IPSET_OPT_TYPE);
        if (session->lineno != 0 &&
            (cmd == IPSET_CMD_ADD || cmd == IPSET_CMD_DEL)) {
                /* Save setname for the next possible aggregated restore line */
                strcpy(session->saved_setname, ipset_data_setname(data));
                ipset_data_reset(data);
                /* Don't commit: we may aggregate next command */
                ret = 0;
                goto cleanup;
        }

        D("call commit");
        ret = ipset_commit(session);

cleanup:
        D("reset data");
        ipset_data_reset(data);
        return ret;
}


#define NFNL_SUBSYS_IPSET               6
 
static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1875         .name           = "ip_set",
1876         .subsys_id      = NFNL_SUBSYS_IPSET,
1877         .cb_count       = IPSET_MSG_MAX,
1878         .cb             = ip_set_netlink_subsys_cb,
1879 }; 

2000 static struct nf_sockopt_ops so_set __read_mostly = {
2001         .pf             = PF_INET,
2002         .get_optmin     = SO_IP_SET,
2003         .get_optmax     = SO_IP_SET + 1,
2004         .get            = &ip_set_sockfn_get,
2005         .owner          = THIS_MODULE,
2006 };
 
 
static int __init
2055 ip_set_init(void)
2056 {
2057         int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
2063         ret = nf_register_sockopt(&so_set);
2064         if (ret != 0) {
2065                 pr_err("SO_SET registry failed: %d\n", ret);
2066                 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2067                 return ret;
2068         }
 
      }
 
 
1802 static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1803         [IPSET_CMD_NONE]        = {
1804                 .call           = ip_set_none,
1805                 .attr_count     = IPSET_ATTR_CMD_MAX,
1806         },
1807         [IPSET_CMD_CREATE]      = {
1808                 .call           = ip_set_create,
1809                 .attr_count     = IPSET_ATTR_CMD_MAX,
1810                 .policy         = ip_set_create_policy,
1811         },
1812         [IPSET_CMD_DESTROY]     = {
1813                 .call           = ip_set_destroy,
1814                 .attr_count     = IPSET_ATTR_CMD_MAX,
1815                 .policy         = ip_set_setname_policy,
1816         },
1817         [IPSET_CMD_FLUSH]       = {
1818                 .call           = ip_set_flush,
1819                 .attr_count     = IPSET_ATTR_CMD_MAX,
1820                 .policy         = ip_set_setname_policy,
1821         },
1822         [IPSET_CMD_RENAME]      = {
1823                 .call           = ip_set_rename,
1824                 .attr_count     = IPSET_ATTR_CMD_MAX,
1825                 .policy         = ip_set_setname2_policy,
1826         },
1827         [IPSET_CMD_SWAP]        = {
1828                 .call           = ip_set_swap,
1829                 .attr_count     = IPSET_ATTR_CMD_MAX,
1830                 .policy         = ip_set_setname2_policy,
1831         },
1832         [IPSET_CMD_LIST]        = {
1833                 .call           = ip_set_dump,
1834                 .attr_count     = IPSET_ATTR_CMD_MAX,
1835                 .policy         = ip_set_setname_policy,
1836         },
1837         [IPSET_CMD_SAVE]        = {
1838                 .call           = ip_set_dump,
1839                 .attr_count     = IPSET_ATTR_CMD_MAX,
1840                 .policy         = ip_set_setname_policy,
1841         },
1842         [IPSET_CMD_ADD] = {
1843                 .call           = ip_set_uadd,
1844                 .attr_count     = IPSET_ATTR_CMD_MAX,
1845                 .policy         = ip_set_adt_policy,
1846         },
1847         [IPSET_CMD_DEL] = {
1848                 .call           = ip_set_udel,
1849                 .attr_count     = IPSET_ATTR_CMD_MAX,
1850                 .policy         = ip_set_adt_policy,
1851         },
1852         [IPSET_CMD_TEST]        = {
1853                 .call           = ip_set_utest,
1854                 .attr_count     = IPSET_ATTR_CMD_MAX,
1855                 .policy         = ip_set_adt_policy,
1856         },
1857         [IPSET_CMD_HEADER]      = {
1858                 .call           = ip_set_header,
1859                 .attr_count     = IPSET_ATTR_CMD_MAX,
1860                 .policy         = ip_set_setname_policy,
1861         },
1862         [IPSET_CMD_TYPE]        = {
1863                 .call           = ip_set_type,
1864                 .attr_count     = IPSET_ATTR_CMD_MAX,
1865                 .policy         = ip_set_type_policy,
1866         },
1867         [IPSET_CMD_PROTOCOL]    = {
1868                 .call           = ip_set_protocol,
1869                 .attr_count     = IPSET_ATTR_CMD_MAX,
1870                 .policy         = ip_set_protocol_policy,
1871         },
1872 }; 


1881 /* Interface to iptables/ip6tables */
1882 
1883 static int
1884 ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1885 {
1886         unsigned int *op;
1887         void *data;
1888         int copylen = *len, ret = 0;
1889         struct net *net = sock_net(sk);
1890         struct ip_set_net *inst = ip_set_pernet(net);
1891 
1892         if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1893                 return -EPERM;
1894         if (optval != SO_IP_SET)
1895                 return -EBADF;
1896         if (*len < sizeof(unsigned int))
1897                 return -EINVAL;
1898 
1899         data = vmalloc(*len);
1900         if (!data)
1901                 return -ENOMEM;
1902         if (copy_from_user(data, user, *len) != 0) {
1903                 ret = -EFAULT;
1904                 goto done;
1905         }
1906         op = (unsigned int *)data;
1907 
1908         if (*op < IP_SET_OP_VERSION) {
1909                 /* Check the version at the beginning of operations */
1910                 struct ip_set_req_version *req_version = data;
1911 
1912                 if (*len < sizeof(struct ip_set_req_version)) {
1913                         ret = -EINVAL;
1914                         goto done;
1915                 }
1916 
1917                 if (req_version->version != IPSET_PROTOCOL) {
1918                         ret = -EPROTO;
1919                         goto done;
1920                 }
1921         }
1922 
1923         switch (*op) {
1924         case IP_SET_OP_VERSION: {
1925                 struct ip_set_req_version *req_version = data;
1926 
1927                 if (*len != sizeof(struct ip_set_req_version)) {
1928                         ret = -EINVAL;
1929                         goto done;
1930                 }
1931 
1932                 req_version->version = IPSET_PROTOCOL;
1933                 ret = copy_to_user(user, req_version,
1934                                    sizeof(struct ip_set_req_version));
1935                 goto done;
1936         }
1937         case IP_SET_OP_GET_BYNAME: {
1938                 struct ip_set_req_get_set *req_get = data;
1939                 ip_set_id_t id;
1940 
1941                 if (*len != sizeof(struct ip_set_req_get_set)) {
1942                         ret = -EINVAL;
1943                         goto done;
1944                 }
1945                 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1946                 nfnl_lock(NFNL_SUBSYS_IPSET);
1947                 find_set_and_id(inst, req_get->set.name, &id);
1948                 req_get->set.index = id;
1949                 nfnl_unlock(NFNL_SUBSYS_IPSET);
1950                 goto copy;
1951         }
1952         case IP_SET_OP_GET_FNAME: {
1953                 struct ip_set_req_get_set_family *req_get = data;
1954                 ip_set_id_t id;
1955 
1956                 if (*len != sizeof(struct ip_set_req_get_set_family)) {
1957                         ret = -EINVAL;
1958                         goto done;
1959                 }
1960                 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1961                 nfnl_lock(NFNL_SUBSYS_IPSET);
1962                 find_set_and_id(inst, req_get->set.name, &id);
1963                 req_get->set.index = id;
1964                 if (id != IPSET_INVALID_ID)
1965                         req_get->family = ip_set(inst, id)->family;
1966                 nfnl_unlock(NFNL_SUBSYS_IPSET);
1967                 goto copy;
1968         }
1969         case IP_SET_OP_GET_BYINDEX: {
1970                 struct ip_set_req_get_set *req_get = data;
1971                 struct ip_set *set;
1972 
1973                 if (*len != sizeof(struct ip_set_req_get_set) ||
1974                     req_get->set.index >= inst->ip_set_max) {
1975                         ret = -EINVAL;
1976                         goto done;
1977                 }
1978                 nfnl_lock(NFNL_SUBSYS_IPSET);
1979                 set = ip_set(inst, req_get->set.index);
1980                 strncpy(req_get->set.name, set ? set->name : "",
1981                         IPSET_MAXNAMELEN);
1982                 nfnl_unlock(NFNL_SUBSYS_IPSET);
1983                 goto copy;
1984         }
1985         default:
1986                 ret = -EBADMSG;
1987                 goto done;
1988         }       /* end of switch(op) */
1989 
1990 copy:
1991         ret = copy_to_user(user, data, copylen);
1992 
1993 done:
1994         vfree(data);
1995         if (ret > 0)
1996                 ret = 0;
1997         return ret;
1998 }
 
What happens now when we add a Rule that which makes it match with IPSET, set match set target.
 
From iptables, ipset is used via the set match and SET target. The corresponding kernel module
 (net/netfilter/xt_set.c) calls the ipset kernel API functions: 
ip_set_test(), ip_set_add(), ip_set_del(). 


 

Product Buy