/* Lower half of IP, consisting of gateway routines * Includes routing and options processing code */ #include "global.h" #include "mbuf.h" #include "internet.h" #include "timer.h" #include "netuser.h" #include "ip.h" #include "icmp.h" #include "iface.h" #include "trace.h" #ifdef UNIX #include #endif struct route *routes[32][NROUTE]; /* Routing table */ struct route r_default; /* Default route entry */ int32 ip_addr; struct ip_stats ip_stats; #ifndef GWONLY struct mbuf *loopq; /* Queue for loopback packets */ #endif /* Route an IP datagram. This is the "hopper" through which all IP datagrams, * coming or going, must pass. * * "rxbroadcast" is set to indicate that the packet came in on a subnet * broadcast. The router will kick the packet upstairs regardless of the * IP destination address. */ int ip_route(bp,rxbroadcast) struct mbuf *bp; char rxbroadcast; /* True if packet had link broadcast address */ { struct mbuf *htonip(); void ip_recv(); struct ip ip; /* IP header being processed */ int16 ip_len; /* IP header length */ int16 length; /* Length of data portion */ int32 gateway; /* Gateway IP address */ register struct route *rp; /* Route table entry */ struct interface *iface; /* Output interface, possibly forwarded */ struct route *rt_lookup(); int16 offset; /* Offset into current fragment */ int16 mf_flag; /* Original datagram MF flag */ int strict = 0; /* Strict source routing flag */ char precedence; /* Extracted from tos field */ char delay; char throughput; char reliability; int16 opt_len; /* Length of current option */ char *opt; /* -> beginning of current option */ int i; struct mbuf *tbp; int pointer; /* Relative pointer index for sroute/rroute */ ip_stats.total++; if(len_mbuf(bp) < IPLEN){ /* The packet is shorter than a legal IP header */ ip_stats.runt++; free_p(bp); return -1; } /* Sneak a peek at the IP header's IHL field to find its length */ ip_len = (bp->data[0] & 0xf) << 2; if(ip_len < IPLEN){ /* The IP header length field is too small */ ip_stats.length++; free_p(bp); return -1; } if(cksum(NULLHEADER,bp,ip_len) != 0){ /* Bad IP header checksum; discard */ ip_stats.checksum++; free_p(bp); return -1; } /* Extract IP header */ if (ntohip(&ip,&bp) < 0) { /* ntohip found error - invalid length */ ip_stats.length++; free_p(bp); return -1; } if(ip.version != IPVERSION){ /* We can't handle this version of IP */ ip_stats.version++; free_p(bp); return -1; } if(ip.length < ip_len){ /* The IP header length field is too small */ ip_stats.length++; free_p(bp); return -1; } /* Trim data segment if necessary. */ length = ip.length - ip_len; /* Length of data portion */ trim_mbuf(&bp,length); /* Process options, if any. Also compute length of secondary IP * header in case fragmentation is needed later */ strict = 0; for(i=0; i opt_len) break; /* Route exhausted; it's for us */ /* Put address for next hop into destination field, * put our address into the route field, and bump * the pointer. We've already ensured enough space. */ ip.dest = get32(&opt[pointer]); put32(&opt[pointer],ip_addr); opt[2] += 4; break; case IP_RROUTE: /* Record route */ if(opt_len < 3){ /* Option is too short to be a legal rroute. * Send an ICMP message and drop it. */ union icmp_args icmp_args; icmp_args.pointer = IPLEN + i; if(!rxbroadcast) icmp_output(&ip,bp,PARAM_PROB, 0,&icmp_args); free_p(bp); return -1; } pointer = uchar(opt[2]); if(pointer + 4 > opt_len){ /* Route area exhausted; send an ICMP msg */ union icmp_args icmp_args; icmp_args.pointer = IPLEN + i; if(!rxbroadcast) icmp_output(&ip,bp,PARAM_PROB, 0,&icmp_args); /* Also drop if odd-sized */ if(pointer != opt_len){ free_p(bp); return -1; } } else { /* Add our address to the route. * We've already ensured there's enough space. */ put32(&opt[pointer],ip_addr); opt[2] += 4; } break; } } no_opt: /* See if it's a broadcast or addressed to us, and kick it upstairs */ if(ip.dest == ip_addr || (ip_addr == 0 && ip.source) || rxbroadcast){ #ifdef GWONLY /* We're only a gateway, we have no host level protocols */ if(!rxbroadcast) icmp_output(&ip,bp,DEST_UNREACH,PROT_UNREACH, (union icmp_args *)NULL); free_p(bp); #else /* If this is a local loopback packet, place on the loopback * queue for processing in the main loop. This prevents the * infinite stack recursion and other problems that would * otherwise occur when we talk to ourselves, e.g., with ftp */ if(ip.source == ip_addr){ /* Put IP header back on */ if((tbp = htonip(&ip,bp,0)) == NULLBUF){ free_p(bp); return -1; } /* Copy loopback packet into new buffer. * This avoids an obscure problem with TCP which * dups its outgoing data before transmission and * then frees it when an ack comes, even though the * receiver might not have actually read it yet */ bp = copy_p(tbp,len_mbuf(tbp)); free_p(tbp); if(bp == NULLBUF) return -1; enqueue(&loopq,bp); } else { ip_recv(&ip,bp,rxbroadcast); } #endif return 0; } /* Decrement TTL and discard if zero */ if(--ip.ttl == 0){ /* Send ICMP "Time Exceeded" message */ icmp_output(&ip,bp,TIME_EXCEED,0,NULLICMP); free_p(bp); return -1; } /* Look up target address in routing table */ if((rp = rt_lookup(ip.dest)) == NULLROUTE){ /* No route exists, return unreachable message */ icmp_output(&ip,bp,DEST_UNREACH,HOST_UNREACH,NULLICMP); free_p(bp); return -1; } /* Check for output forwarding and divert if necessary */ iface = rp->interface; if(iface->forw != NULLIF) iface = iface->forw; /* Find gateway; zero gateway in routing table means "send direct" */ if(rp->gateway == (int32)0) gateway = ip.dest; else gateway = rp->gateway; if(strict && gateway != ip.dest){ /* Strict source routing requires a direct entry */ icmp_output(&ip,bp,DEST_UNREACH,ROUTE_FAIL,NULLICMP); free_p(bp); return -1; } precedence = PREC(ip.tos); delay = ip.tos & DELAY; throughput = ip.tos & THRUPUT; reliability = ip.tos & RELIABILITY; if(ip.length <= iface->mtu){ /* Datagram smaller than interface MTU; put header * back on and send normally */ if((tbp = htonip(&ip,bp,0)) == NULLBUF){ free_p(bp); return -1; } return (*iface->send)(tbp,iface,gateway, precedence,delay,throughput,reliability); } /* Fragmentation needed */ if(ip.fl_offs & DF){ /* Don't Fragment set; return ICMP message and drop */ icmp_output(&ip,bp,DEST_UNREACH,FRAG_NEEDED,NULLICMP); free_p(bp); return -1; } /* Create fragments */ offset = (ip.fl_offs & F_OFFSET) << 3; mf_flag = ip.fl_offs & MF; /* Save original MF flag */ while(length != 0){ /* As long as there's data left */ int16 fragsize; /* Size of this fragment's data */ struct mbuf *f_data; /* Data portion of fragment */ /* After the first fragment, should remove those * options that aren't supposed to be copied on fragmentation */ ip.fl_offs = offset >> 3; if(length + ip_len <= iface->mtu){ /* Last fragment; send all that remains */ fragsize = length; ip.fl_offs |= mf_flag; /* Pass original MF flag */ } else { /* More to come, so send multiple of 8 bytes */ fragsize = (iface->mtu - ip_len) & 0xfff8; ip.fl_offs |= MF; } ip.length = fragsize + ip_len; /* Move the data fragment into a new, separate mbuf */ if((f_data = alloc_mbuf(fragsize)) == NULLBUF){ free_p(bp); return -1; } f_data->cnt = pullup(&bp,f_data->data,fragsize); /* Put IP header back on */ if((tbp = htonip(&ip,f_data,0)) == NULLBUF){ free_p(f_data); free_p(bp); return -1; } /* and ship it out */ if((*iface->send)(tbp,iface,gateway, precedence,delay,throughput,reliability) == -1) return -1; offset += fragsize; length -= fragsize; } return 0; } struct rt_cache rt_cache; /* Add an entry to the IP routing table. Returns 0 on success, -1 on failure */ int rt_add(target,bits,gateway,metric,interface) int32 target; /* Target IP address prefix */ unsigned int bits; /* Size of target address prefix in bits (0-32) */ int32 gateway; int metric; struct interface *interface; { struct route *rp,**hp,*rt_lookup(); int16 hash_ip(),i; int32 mask; if(interface == NULLIF) return -1; rt_cache.target = 0; /* Flush cache */ /* Zero bits refers to the default route */ if(bits == 0){ rp = &r_default; } else { if(bits > 32) bits = 32; /* Mask off don't-care bits */ mask = 0xffffffff; for(i=31;i >= bits;i--) mask <<= 1; target &= mask; /* Search appropriate chain for existing entry */ for(rp = routes[bits-1][hash_ip(target)];rp != NULLROUTE;rp = rp->next){ if(rp->target == target) break; } } if(rp == NULLROUTE){ /* The target is not already in the table, so create a new * entry and put it in. */ if((rp = (struct route *)malloc(sizeof(struct route))) == NULLROUTE) return -1; /* No space */ /* Insert at head of table */ rp->prev = NULLROUTE; hp = &routes[bits-1][hash_ip(target)]; rp->next = *hp; if(rp->next != NULLROUTE) rp->next->prev = rp; *hp = rp; } rp->target = target; rp->gateway = gateway; rp->metric = metric; rp->interface = interface; return 0; } /* Remove an entry from the IP routing table. Returns 0 on success, -1 * if entry was not in table. */ int rt_drop(target,bits) int32 target; unsigned int bits; { register struct route *rp; struct route *rt_lookup(); unsigned int i; int16 hash_ip(); int32 mask; rt_cache.target = 0; /* Flush the cache */ if(bits == 0){ /* Nail the default entry */ r_default.interface = NULLIF; return 0; } if(bits > 32) bits = 32; /* Mask off don't-care bits */ mask = 0xffffffff; for(i=31;i >= bits;i--) mask <<= 1; target &= mask; /* Search appropriate chain for existing entry */ for(rp = routes[bits-1][hash_ip(target)];rp != NULLROUTE;rp = rp->next){ if(rp->target == target) break; } if(rp == NULLROUTE) return -1; /* Not in table */ if(rp->next != NULLROUTE) rp->next->prev = rp->prev; if(rp->prev != NULLROUTE) rp->prev->next = rp->next; else routes[bits-1][hash_ip(target)] = rp->next; free((char *)rp); return 0; } /* Compute hash function on IP address */ static int16 hash_ip(addr) register int32 addr; { register int16 ret; ret = hiword(addr); ret ^= loword(addr); ret %= NROUTE; return ret; } #ifndef GWONLY /* Given an IP address, return the MTU of the local interface used to * reach that destination. This is used by TCP to avoid local fragmentation */ int16 ip_mtu(addr) int32 addr; { register struct route *rp; struct route *rt_lookup(); struct interface *iface; rp = rt_lookup(addr); if(rp == NULLROUTE || rp->interface == NULLIF) return 0; iface = rp->interface; if(iface->forw != NULLIF) return iface->forw->mtu; else return iface->mtu; } #endif /* Look up target in hash table, matching the entry having the largest number * of leading bits in common. Return default route if not found; * if default route not set, return NULLROUTE */ static struct route * rt_lookup(target) int32 target; { register struct route *rp; int16 hash_ip(); int bits; int32 tsave; int32 mask; if(target == rt_cache.target) return rt_cache.route; tsave = target; mask = ~0; /* All ones */ for(bits = 31;bits >= 0; bits--){ target &= mask; for(rp = routes[bits][hash_ip(target)];rp != NULLROUTE;rp = rp->next){ if(rp->target == target){ /* Stash in cache and return */ rt_cache.target = tsave; rt_cache.route = rp; return rp; } } mask <<= 1; } if(r_default.interface != NULLIF){ rt_cache.target = tsave; rt_cache.route = &r_default; return &r_default; } else return NULLROUTE; } /* Convert IP header in host format to network mbuf */ struct mbuf * htonip(ip,data,checksum) struct ip *ip; struct mbuf *data; int checksum; { int16 hdr_len; struct mbuf *bp; register char *cp; hdr_len = IPLEN + ip->optlen; if((bp = pushdown(data,hdr_len)) == NULLBUF){ free_p(data); return NULLBUF; } cp = bp->data; *cp++ = (IPVERSION << 4) | (hdr_len >> 2); *cp++ = ip->tos; cp = put16(cp,ip->length); cp = put16(cp,ip->id); cp = put16(cp,ip->fl_offs); *cp++ = ip->ttl; *cp++ = ip->protocol; if(checksum) /* Use checksum from host structure */ cp = put16(cp,checksum); else cp = put16(cp,0); /* Clear checksum */ cp = put32(cp,ip->source); cp = put32(cp,ip->dest); if(ip->optlen != 0) memcpy(cp,ip->options,(int)ip->optlen); /* Compute checksum and insert into header */ if (! checksum) { checksum = cksum(NULLHEADER,bp,hdr_len); put16(&bp->data[10],checksum); } return bp; } /* Extract an IP header from mbuf */ int ntohip(ip,bpp) struct ip *ip; struct mbuf **bpp; { char v_ihl; int16 ihl; int16 optlen; v_ihl = pullchar(bpp); ip->version = (v_ihl >> 4) & 0xf; ip->tos = pullchar(bpp); ip->length = pull16(bpp); ip->id = pull16(bpp); ip->fl_offs = pull16(bpp); ip->ttl = pullchar(bpp); ip->protocol = pullchar(bpp); ip->checksum = pull16(bpp); ip->source = pull32(bpp); ip->dest = pull32(bpp); ihl = (v_ihl & 0xf) << 2; if(ihl < IPLEN){ /* Bogus packet; header is too short */ return -1; } ip->optlen = ihl - IPLEN; if(ip->optlen != 0) { optlen = pullup(bpp,ip->options,ip->optlen); if (optlen != ip->optlen) return -1; } return ip->optlen + IPLEN; } /* Perform end-around-carry adjustment */ int16 eac(sum) register int32 sum; /* Carries in high order 16 bits */ { register int16 csum; while((csum = sum >> 16) != 0) sum = csum + (sum & 0xffffL); return (int16) (sum & 0xffffl); /* Chops to 16 bits */ } /* Checksum a mbuf chain, with optional pseudo-header */ int16 cksum(ph,m,len) struct pseudo_header *ph; register struct mbuf *m; int16 len; { register unsigned int cnt, total; register int32 sum, csum; register char *up; int16 csum1; int swap = 0; int16 lcsum(); sum = 0l; /* Sum pseudo-header, if present */ if(ph != NULLHEADER){ sum = hiword(ph->source); sum += loword(ph->source); sum += hiword(ph->dest); sum += loword(ph->dest); sum += uchar(ph->protocol); sum += ph->length; } /* Now do each mbuf on the chain */ for(total = 0; m != NULLBUF && total < len; m = m->next) { cnt = min(m->cnt, len - total); up = (char *)m->data; csum = 0; if(((long)up) & 1){ /* Handle odd leading byte */ if(swap) csum = uchar(*up++); else csum = (int16)(uchar(*up++) << 8); cnt--; swap = !swap; } if(cnt > 1){ /* Have the primitive checksumming routine do most of * the work. At this point, up is guaranteed to be on * a short boundary */ csum1 = lcsum((unsigned short *)up, cnt >> 1); if(swap) csum1 = (csum1 << 8) | (csum1 >> 8); csum += csum1; } /* Handle odd trailing byte */ if(cnt & 1){ if(swap) csum += uchar(up[--cnt]); else csum += (int16)(uchar(up[--cnt]) << 8); swap = !swap; } sum += csum; total += m->cnt; } /* Do final end-around carry, complement and return */ return ~eac(sum) & 0xffff; } /* Machine-independent, alignment insensitive network-to-host long conversion */ static int32 get32(cp) register char *cp; { int32 rval; rval = uchar(*cp++); rval <<= 8; rval |= uchar(*cp++); rval <<= 8; rval |= uchar(*cp++); rval <<= 8; rval |= uchar(*cp); return rval; }