2020-02-06 06:16:58 +08:00
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
2020-05-01 04:20:09 +08:00
package router
2020-02-06 06:16:58 +08:00
import (
"bytes"
2020-05-01 13:34:49 +08:00
"errors"
2020-02-06 06:16:58 +08:00
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
2020-03-04 04:38:51 +08:00
"github.com/coreos/go-iptables/iptables"
2020-02-06 06:16:58 +08:00
"github.com/tailscale/wireguard-go/device"
"github.com/tailscale/wireguard-go/tun"
2020-05-08 13:17:30 +08:00
"inet.af/netaddr"
2020-02-06 06:16:58 +08:00
"tailscale.com/atomicfile"
2020-02-15 11:23:16 +08:00
"tailscale.com/types/logger"
2020-02-06 06:16:58 +08:00
)
2020-05-02 09:55:38 +08:00
// The following bits are added to packet marks for Tailscale use.
//
// We tried to pick bits sufficiently out of the way that it's
2020-05-08 02:01:46 +08:00
// unlikely to collide with existing uses. We have 4 bytes of mark
// bits to play with. We leave the lower byte alone on the assumption
// that sysadmins would use those. Kubernetes uses a few bits in the
// second byte, so we steer clear of that too.
2020-05-02 09:55:38 +08:00
//
// Empirically, most of the documentation on packet marks on the
// internet gives the impression that the marks are 16 bits
// wide. Based on this, we theorize that the upper two bytes are
// relatively unused in the wild, and so we consume bits starting at
// the 17th.
//
// The constants are in the iptables/iproute2 string format for
// matching and setting the bits, so they can be directly embedded in
// commands.
const (
// Packet is from Tailscale and to a subnet route destination, so
// is allowed to be routed through this machine.
tailscaleSubnetRouteMark = "0x10000/0x10000"
// Packet was originated by tailscaled itself, and must not be
// routed over the Tailscale network.
tailscaleBypassMark = "0x20000/0x20000"
)
2020-02-06 06:16:58 +08:00
type linuxRouter struct {
2020-05-02 09:55:38 +08:00
logf func ( fmt string , args ... interface { } )
tunname string
2020-05-08 13:17:30 +08:00
addrs map [ netaddr . IPPrefix ] bool
routes map [ netaddr . IPPrefix ] bool
subnetRoutes map [ netaddr . IPPrefix ] bool
2020-03-04 04:38:51 +08:00
ipt4 * iptables . IPTables
2020-02-06 06:16:58 +08:00
}
2020-02-18 01:00:38 +08:00
func newUserspaceRouter ( logf logger . Logf , _ * device . Device , tunDev tun . Device ) ( Router , error ) {
2020-02-15 07:03:25 +08:00
tunname , err := tunDev . Name ( )
if err != nil {
return nil , err
}
2020-03-04 04:38:51 +08:00
ipt4 , err := iptables . NewWithProtocol ( iptables . ProtocolIPv4 )
if err != nil {
return nil , err
}
2020-02-15 07:03:25 +08:00
return & linuxRouter {
2020-02-18 01:00:38 +08:00
logf : logf ,
tunname : tunname ,
2020-03-04 04:38:51 +08:00
ipt4 : ipt4 ,
2020-02-15 07:03:25 +08:00
} , nil
2020-02-06 06:16:58 +08:00
}
2020-05-01 13:34:49 +08:00
func cmd ( args ... string ) error {
2020-02-06 06:16:58 +08:00
if len ( args ) == 0 {
2020-05-01 13:34:49 +08:00
return errors . New ( "cmd: no argv[0]" )
2020-02-06 06:16:58 +08:00
}
2020-05-01 13:34:49 +08:00
out , err := exec . Command ( args [ 0 ] , args [ 1 : ] ... ) . CombinedOutput ( )
if err != nil {
return fmt . Errorf ( "running %q failed: %v\n%s" , strings . Join ( args , " " ) , err , out )
}
return nil
2020-02-06 06:16:58 +08:00
}
func ( r * linuxRouter ) Up ( ) error {
2020-05-02 09:55:38 +08:00
if err := r . deleteLegacyNetfilter ( ) ; err != nil {
2020-05-01 13:34:49 +08:00
return err
2020-02-06 06:16:58 +08:00
}
2020-05-02 09:55:38 +08:00
if err := r . addBaseNetfilter4 ( ) ; err != nil {
return err
}
if err := r . addBypassRule ( ) ; err != nil {
2020-05-01 13:34:49 +08:00
return err
2020-02-06 06:16:58 +08:00
}
2020-05-02 09:55:38 +08:00
if err := r . upInterface ( ) ; err != nil {
return err
}
return nil
}
2020-05-01 13:34:49 +08:00
2020-05-02 09:55:38 +08:00
func ( r * linuxRouter ) down ( ) error {
if err := r . downInterface ( ) ; err != nil {
2020-05-01 13:34:49 +08:00
return err
2020-02-06 06:16:58 +08:00
}
2020-05-02 09:55:38 +08:00
if err := r . delBypassRule ( ) ; err != nil {
return err
}
if err := r . delNetfilter4 ( ) ; err != nil {
return err
}
2020-05-08 08:18:18 +08:00
r . addrs = nil
r . routes = nil
r . subnetRoutes = nil
2020-05-02 09:55:38 +08:00
2020-02-06 06:16:58 +08:00
return nil
}
2020-05-02 09:55:38 +08:00
func ( r * linuxRouter ) Close ( ) error {
var ret error
if ret = r . restoreResolvConf ( ) ; ret != nil {
r . logf ( "failed to restore system resolv.conf: %v" , ret )
}
if err := r . down ( ) ; err != nil {
if ret == nil {
ret = err
}
}
return ret
}
2020-05-08 09:07:13 +08:00
// Set implements the Router interface.
func ( r * linuxRouter ) Set ( rs Settings ) error {
// cidrDiff calls add and del as needed to make the set of prefixes in
// old and new match. Returns a map version of new, and the first
// error encountered while reconfiguring, if any.
2020-05-08 13:17:30 +08:00
cidrDiff := func ( kind string , old map [ netaddr . IPPrefix ] bool , new [ ] netaddr . IPPrefix , add , del func ( netaddr . IPPrefix ) error ) ( map [ netaddr . IPPrefix ] bool , error ) {
2020-05-08 09:07:13 +08:00
var (
2020-05-08 13:17:30 +08:00
ret = make ( map [ netaddr . IPPrefix ] bool , len ( new ) )
2020-05-08 09:07:13 +08:00
errq error
)
for _ , cidr := range new {
ret [ cidr ] = true
2020-05-08 08:18:18 +08:00
}
2020-05-08 09:07:13 +08:00
for cidr := range old {
if ret [ cidr ] {
continue
2020-02-06 06:16:58 +08:00
}
2020-05-08 09:07:13 +08:00
if err := del ( cidr ) ; err != nil {
r . logf ( "%s del failed: %v" , kind , err )
if errq == nil {
errq = err
}
2020-02-06 06:16:58 +08:00
}
}
2020-05-08 09:07:13 +08:00
for cidr := range ret {
if old [ cidr ] {
continue
2020-02-06 06:16:58 +08:00
}
2020-05-08 09:07:13 +08:00
if err := add ( cidr ) ; err != nil {
r . logf ( "%s add failed: %v" , kind , err )
if errq == nil {
errq = err
}
2020-05-02 09:55:38 +08:00
}
}
2020-05-08 09:07:13 +08:00
return ret , errq
2020-05-02 09:55:38 +08:00
}
2020-05-08 09:07:13 +08:00
var errq error
newAddrs , err := cidrDiff ( "addr" , r . addrs , rs . LocalAddrs , r . addAddress , r . delAddress )
if err != nil && errq == nil {
errq = err
2020-05-02 09:55:38 +08:00
}
2020-05-08 09:07:13 +08:00
newRoutes , err := cidrDiff ( "route" , r . routes , rs . Routes , r . addRoute , r . delRoute )
if err != nil && errq == nil {
errq = err
2020-05-02 09:55:38 +08:00
}
2020-05-08 09:07:13 +08:00
newSubnetRoutes , err := cidrDiff ( "subnet rule" , r . subnetRoutes , rs . SubnetRoutes , r . addSubnetRule , r . delSubnetRule )
if err != nil && errq == nil {
errq = err
2020-02-06 06:16:58 +08:00
}
2020-05-08 08:18:18 +08:00
r . addrs = newAddrs
2020-02-06 06:16:58 +08:00
r . routes = newRoutes
2020-05-02 09:55:38 +08:00
r . subnetRoutes = newSubnetRoutes
2020-02-06 06:16:58 +08:00
2020-02-12 07:21:24 +08:00
// TODO: this:
2020-02-06 06:16:58 +08:00
if false {
if err := r . replaceResolvConf ( rs . DNS , rs . DNSDomains ) ; err != nil {
errq = fmt . Errorf ( "replacing resolv.conf failed: %v" , err )
}
}
return errq
}
const (
tsConf = "/etc/resolv.tailscale.conf"
backupConf = "/etc/resolv.pre-tailscale-backup.conf"
resolvConf = "/etc/resolv.conf"
)
2020-05-08 13:17:30 +08:00
func ( r * linuxRouter ) replaceResolvConf ( servers [ ] netaddr . IP , domains [ ] string ) error {
2020-02-06 06:16:58 +08:00
if len ( servers ) == 0 {
return r . restoreResolvConf ( )
}
// First write the tsConf file.
buf := new ( bytes . Buffer )
fmt . Fprintf ( buf , "# resolv.conf(5) file generated by tailscale\n" )
fmt . Fprintf ( buf , "# DO NOT EDIT THIS FILE BY HAND -- CHANGES WILL BE OVERWRITTEN\n\n" )
for _ , ns := range servers {
fmt . Fprintf ( buf , "nameserver %s\n" , ns )
}
if len ( domains ) > 0 {
fmt . Fprintf ( buf , "search " + strings . Join ( domains , " " ) + "\n" )
}
f , err := ioutil . TempFile ( filepath . Dir ( tsConf ) , filepath . Base ( tsConf ) + ".*" )
if err != nil {
return err
}
f . Close ( )
if err := atomicfile . WriteFile ( f . Name ( ) , buf . Bytes ( ) , 0644 ) ; err != nil {
return err
}
os . Chmod ( f . Name ( ) , 0644 ) // ioutil.TempFile creates the file with 0600
if err := os . Rename ( f . Name ( ) , tsConf ) ; err != nil {
return err
}
if linkPath , err := os . Readlink ( resolvConf ) ; err != nil {
// Remove any old backup that may exist.
os . Remove ( backupConf )
// Backup the existing /etc/resolv.conf file.
contents , err := ioutil . ReadFile ( resolvConf )
if os . IsNotExist ( err ) {
2020-02-22 07:27:21 +08:00
// No existing /etc/resolv.conf file to backup.
2020-02-06 06:16:58 +08:00
// Nothing to do.
return nil
} else if err != nil {
return err
}
if err := atomicfile . WriteFile ( backupConf , contents , 0644 ) ; err != nil {
return err
}
} else if linkPath != tsConf {
// Backup the existing symlink.
os . Remove ( backupConf )
if err := os . Symlink ( linkPath , backupConf ) ; err != nil {
return err
}
} else {
// Nothing to do, resolvConf already points to tsConf.
return nil
}
os . Remove ( resolvConf )
if err := os . Symlink ( tsConf , resolvConf ) ; err != nil {
return nil
}
out , _ := exec . Command ( "service" , "systemd-resolved" , "restart" ) . CombinedOutput ( )
if len ( out ) > 0 {
r . logf ( "service systemd-resolved restart: %s" , out )
}
return nil
}
func ( r * linuxRouter ) restoreResolvConf ( ) error {
if _ , err := os . Stat ( backupConf ) ; err != nil {
if os . IsNotExist ( err ) {
2020-02-22 07:27:21 +08:00
return nil // no backup resolv.conf to restore
2020-02-06 06:16:58 +08:00
}
return err
}
if ln , err := os . Readlink ( resolvConf ) ; err != nil {
return err
} else if ln != tsConf {
2020-02-22 07:27:21 +08:00
return fmt . Errorf ( "resolv.conf is not a symlink to %s" , tsConf )
2020-02-06 06:16:58 +08:00
}
if err := os . Rename ( backupConf , resolvConf ) ; err != nil {
return err
}
os . Remove ( tsConf ) // best effort removal of tsConf file
out , _ := exec . Command ( "service" , "systemd-resolved" , "restart" ) . CombinedOutput ( )
if len ( out ) > 0 {
r . logf ( "service systemd-resolved restart: %s" , out )
}
return nil
}
2020-05-02 09:55:38 +08:00
// addAddress adds an IP/mask to the tunnel interface. Fails if the
// address is already assigned to the interface, or if the addition
// fails.
2020-05-08 13:17:30 +08:00
func ( r * linuxRouter ) addAddress ( addr netaddr . IPPrefix ) error {
2020-05-02 09:55:38 +08:00
return cmd ( "ip" , "addr" , "add" , addr . String ( ) , "dev" , r . tunname )
}
// delAddress removes an IP/mask from the tunnel interface. Fails if
// the address is not assigned to the interface, or if the removal
// fails.
2020-05-08 13:17:30 +08:00
func ( r * linuxRouter ) delAddress ( addr netaddr . IPPrefix ) error {
2020-05-02 09:55:38 +08:00
return cmd ( "ip" , "addr" , "del" , addr . String ( ) , "dev" , r . tunname )
}
// normalizeCIDR returns cidr as an ip/mask string, with the host bits
// of the IP address zeroed out.
2020-05-08 13:17:30 +08:00
func normalizeCIDR ( cidr netaddr . IPPrefix ) string {
2020-05-02 09:55:38 +08:00
ncidr := cidr . IPNet ( )
nip := ncidr . IP . Mask ( ncidr . Mask )
2020-05-08 13:17:30 +08:00
return fmt . Sprintf ( "%s/%d" , nip , cidr . Bits )
2020-05-02 09:55:38 +08:00
}
2020-05-08 03:22:50 +08:00
// addRoute adds a route for cidr, pointing to the tunnel
// interface. Fails if the route already exists, or if adding the
2020-05-02 09:55:38 +08:00
// route fails.
2020-05-08 13:17:30 +08:00
func ( r * linuxRouter ) addRoute ( cidr netaddr . IPPrefix ) error {
2020-05-08 03:22:50 +08:00
return cmd ( "ip" , "route" , "add" , normalizeCIDR ( cidr ) , "dev" , r . tunname , "scope" , "global" )
2020-05-02 09:55:38 +08:00
}
2020-05-08 03:22:50 +08:00
// delRoute removes the route for cidr pointing to the tunnel
// interface. Fails if the route doesn't exist, or if removing the
// route fails.
2020-05-08 13:17:30 +08:00
func ( r * linuxRouter ) delRoute ( cidr netaddr . IPPrefix ) error {
2020-05-08 03:22:50 +08:00
return cmd ( "ip" , "route" , "del" , normalizeCIDR ( cidr ) , "dev" , r . tunname , "scope" , "global" )
2020-05-02 09:55:38 +08:00
}
// addSubnetRule adds a netfilter rule that allows traffic to flow
// from Tailscale to cidr. Fails if the rule already exists, or if
// adding the route fails.
2020-05-08 13:17:30 +08:00
func ( r * linuxRouter ) addSubnetRule ( cidr netaddr . IPPrefix ) error {
2020-05-02 09:55:38 +08:00
if err := r . ipt4 . Insert ( "filter" , "ts-forward" , 1 , "-i" , r . tunname , "-d" , normalizeCIDR ( cidr ) , "-j" , "MARK" , "--set-mark" , tailscaleSubnetRouteMark ) ; err != nil {
return err
}
if err := r . ipt4 . Insert ( "filter" , "ts-forward" , 1 , "-o" , r . tunname , "-s" , normalizeCIDR ( cidr ) , "-j" , "ACCEPT" ) ; err != nil {
return err
}
return nil
}
// delSubnetRule deletes the netfilter subnet forwarding rule for
// cidr. Fails if the rule doesn't exist, or if removing the rule
// fails.
2020-05-08 13:17:30 +08:00
func ( r * linuxRouter ) delSubnetRule ( cidr netaddr . IPPrefix ) error {
2020-05-02 09:55:38 +08:00
if err := r . ipt4 . Delete ( "filter" , "ts-forward" , "-i" , r . tunname , "-d" , normalizeCIDR ( cidr ) , "-j" , "MARK" , "--set-mark" , tailscaleSubnetRouteMark ) ; err != nil {
return err
}
if err := r . ipt4 . Delete ( "filter" , "ts-forward" , "-o" , r . tunname , "-s" , normalizeCIDR ( cidr ) , "-j" , "ACCEPT" ) ; err != nil {
return err
}
return nil
}
// upInterface brings up the tunnel interface and adds it to the
// Tailscale interface group.
func ( r * linuxRouter ) upInterface ( ) error {
return cmd ( "ip" , "link" , "set" , "dev" , r . tunname , "group" , "10000" , "up" )
}
// downInterface sets the tunnel interface administratively down, and
// returns it to the default interface group.
func ( r * linuxRouter ) downInterface ( ) error {
return cmd ( "ip" , "link" , "set" , "dev" , r . tunname , "group" , "0" , "down" )
}
// addBypassRule adds the policy routing rule that avoids tailscaled
// routing loops. If the rule exists and appears to be a
// tailscale-managed rule, it is gracefully replaced.
func ( r * linuxRouter ) addBypassRule ( ) error {
if err := r . delBypassRule ( ) ; err != nil {
return err
}
return cmd ( "ip" , "rule" , "add" , "fwmark" , tailscaleBypassMark , "priority" , "10000" , "table" , "main" , "suppress_ifgroup" , "10000" )
}
// delBypassrule removes the policy routing rule that avoids
// tailscaled routing loops, if it exists.
func ( r * linuxRouter ) delBypassRule ( ) error {
out , err := exec . Command ( "ip" , "rule" , "list" , "priority" , "10000" ) . CombinedOutput ( )
if err != nil {
return err
}
if len ( out ) == 0 {
// No rule exists.
return nil
}
// Approximate sanity check that the rule we're about to delete
// looks like one that handles Tailscale's fwmark.
if ! bytes . Contains ( out , [ ] byte ( " fwmark " + tailscaleBypassMark ) ) {
return fmt . Errorf ( "ip rule 10000 doesn't look like a Tailscale policy rule: %q" , string ( out ) )
}
return cmd ( "ip" , "rule" , "del" , "priority" , "10000" )
}
// deleteLegacyNetfilter removes the netfilter rules installed by
// older versions of Tailscale, if they exist.
func ( r * linuxRouter ) deleteLegacyNetfilter ( ) error {
del := func ( table , chain string , args ... string ) error {
exists , err := r . ipt4 . Exists ( table , chain , args ... )
if err != nil {
return err
}
if exists {
if err := r . ipt4 . Delete ( table , chain , args ... ) ; err != nil {
return err
}
}
return nil
}
if err := del ( "filter" , "FORWARD" , "-m" , "comment" , "--comment" , "tailscale" , "-i" , r . tunname , "-j" , "ACCEPT" ) ; err != nil {
return err
}
if err := del ( "nat" , "POSTROUTING" , "-m" , "comment" , "--comment" , "tailscale" , "-o" , "eth0" , "-j" , "MASQUERADE" ) ; err != nil {
return err
}
return nil
}
// deleteNetfilter4 removes custom Tailscale chains and processing
// hooks from netfilter.
func ( r * linuxRouter ) delNetfilter4 ( ) error {
del := func ( table , chain string ) error {
tsChain := "ts-" + strings . ToLower ( chain )
exists , err := r . ipt4 . Exists ( table , chain , "-j" , tsChain )
if err != nil {
return err
}
if exists {
if err := r . ipt4 . Delete ( table , chain , "-j" , tsChain ) ; err != nil {
return err
}
}
chains , err := r . ipt4 . ListChains ( table )
if err != nil {
return err
}
for _ , chain := range chains {
if chain == tsChain {
if err := r . ipt4 . DeleteChain ( table , tsChain ) ; err != nil {
return err
}
break
}
}
return nil
}
if err := del ( "filter" , "INPUT" ) ; err != nil {
return err
}
if err := del ( "filter" , "FORWARD" ) ; err != nil {
return err
}
if err := del ( "nat" , "POSTROUTING" ) ; err != nil {
return err
}
return nil
}
2020-05-08 02:30:37 +08:00
// chromeOSVMRange is the subset of the CGNAT IPv4 range used by
// ChromeOS to interconnect the host OS to containers and VMs. We
// avoid allocating Tailscale IPs from it, to avoid conflicts.
const chromeOSVMRange = "100.115.92.0/23"
2020-05-02 09:55:38 +08:00
// addBaseNetfilter4 installs the basic IPv4 netfilter framework for
// Tailscale, in preparation for inserting more rules later.
func ( r * linuxRouter ) addBaseNetfilter4 ( ) error {
// Create our own filtering chains, and hook them into the head of
// various main tables. If the hooks already exist, we don't try
// to fight for first place, because other software does the
// same. We're happy with "someplace up before most other stuff".
divert := func ( table , chain string ) error {
tsChain := "ts-" + strings . ToLower ( chain )
chains , err := r . ipt4 . ListChains ( table )
if err != nil {
return err
}
found := false
for _ , chain := range chains {
if chain == tsChain {
found = true
break
}
}
if found {
err = r . ipt4 . ClearChain ( table , tsChain )
} else {
err = r . ipt4 . NewChain ( table , tsChain )
}
if err != nil {
return err
}
args := [ ] string { "-j" , tsChain }
exists , err := r . ipt4 . Exists ( table , chain , args ... )
if err != nil {
return err
}
if ! exists {
return r . ipt4 . Insert ( table , chain , 1 , args ... )
}
return nil
}
if err := divert ( "filter" , "INPUT" ) ; err != nil {
return err
}
if err := divert ( "filter" , "FORWARD" ) ; err != nil {
return err
}
if err := divert ( "nat" , "POSTROUTING" ) ; err != nil {
return err
}
// Only allow CGNAT range traffic to come from tailscale0. There
// is an exception carved out for ranges used by ChromeOS, for
// which we fall out of the Tailscale chain.
//
// Note, this will definitely break nodes that end up using the
// CGNAT range for other purposes :(.
2020-05-08 02:30:37 +08:00
if err := r . ipt4 . Append ( "filter" , "ts-input" , "!" , "-i" , r . tunname , "-s" , chromeOSVMRange , "-m" , "comment" , "--comment" , "ChromeOS VM connectivity" , "-j" , "RETURN" ) ; err != nil {
2020-05-02 09:55:38 +08:00
return err
}
if err := r . ipt4 . Append ( "filter" , "ts-input" , "!" , "-i" , r . tunname , "-s" , "100.64.0.0/10" , "-j" , "DROP" ) ; err != nil {
return err
}
// Forward and masquerade packets that have the Tailscale subnet
// route bit set. The bit gets set by rules inserted into
// filter/FORWARD later on. We use packet marks here so both
// filter/FORWARD and nat/POSTROUTING can match on these packets
// of interest.
2020-05-08 02:30:37 +08:00
//
// In particular, we only want to apply masquerading in
// nat/POSTROUTING to packets that originated from the Tailscale
// interface, but we can't match on the inbound interface in
// POSTROUTING. So instead, we match on the inbound interface and
// destination IP in filter/FORWARD, and set a packet mark that
// nat/POSTROUTING can use to effectively run that same test
// again.
2020-05-02 09:55:38 +08:00
if err := r . ipt4 . Append ( "filter" , "ts-forward" , "-m" , "mark" , "--mark" , tailscaleSubnetRouteMark , "-j" , "ACCEPT" ) ; err != nil {
return err
}
if err := r . ipt4 . Append ( "filter" , "ts-forward" , "-i" , r . tunname , "-j" , "DROP" ) ; err != nil {
return err
}
// TODO(danderson): this should be optional.
if err := r . ipt4 . Append ( "nat" , "ts-postrouting" , "-m" , "mark" , "--mark" , tailscaleSubnetRouteMark , "-j" , "MASQUERADE" ) ; err != nil {
return err
}
return nil
}