Fix Network reconnects/resets

pull/627/head
Connor Rigby 2018-08-16 11:02:04 -07:00
parent c7ce8278a1
commit 65ff3613c9
No known key found for this signature in database
GPG Key ID: 9B7C52AA37F36C18
9 changed files with 225 additions and 110 deletions

View File

@ -73,6 +73,7 @@ config :farmbot, :behaviour,
local_file = Path.join(System.user_home!(), ".ssh/id_rsa.pub")
local_key = if File.exists?(local_file), do: [File.read!(local_file)], else: []
config :nerves_network, regulatory_domain: "US"
config :nerves_firmware_ssh, authorized_keys: local_key
config :shoehorn,

View File

@ -68,6 +68,7 @@ config :farmbot, :behaviour,
pin_binding_handler: Farmbot.Target.PinBinding.AleHandler,
leds_handler: Farmbot.Target.Leds.AleHandler
config :nerves_network, regulatory_domain: "US"
config :shoehorn,
init: [:nerves_runtime, :nerves_firmware_ssh],
handler: Farmbot.ShoehornHandler,

View File

@ -80,6 +80,9 @@ defmodule Farmbot.BotState.Transport.AMQP do
defp open_connection(token, device, mqtt_server, vhost) do
opts = [
client_properties: [
version: Farmbot.Project.version()
],
host: mqtt_server,
username: device,
password: token,

View File

@ -146,6 +146,7 @@ defmodule Farmbot.Mixfile do
{:nerves_firmware, "~> 0.4"},
{:nerves_firmware_ssh, "~> 0.3.3"},
{:nerves_init_gadget, "~> 0.4.0", only: :dev},
{:nerves_time, "~> 0.2.0"},
{:nerves_network, "~> 0.3"},
{:nerves_wpa_supplicant, github: "nerves-project/nerves_wpa_supplicant", override: true},
{:dhcp_server, "~> 0.4.0"},

View File

@ -38,6 +38,7 @@
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"},
"mime": {:hex, :mime, "1.3.0", "5e8d45a39e95c650900d03f897fbf99ae04f60ab1daa4a34c7a20a5151b7a5fe", [:mix], [], "hexpm"},
"mimerl": {:hex, :mimerl, "1.0.2", "993f9b0e084083405ed8252b99460c4f0563e41729ab42d9074fd5e52439be88", [:rebar3], [], "hexpm"},
"muontrap": {:hex, :muontrap, "0.4.0", "f3c48f5e2cbb89b6406d28e488fbd0da1ce0ca00af332860913999befca9688a", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm"},
"nerves": {:hex, :nerves, "1.1.1", "2fc347fc796c9d0557a68f0da81c3e59c108800dae7f18ed468d7a7e6854c663", [:mix], [{:distillery, "~> 1.4", [hex: :distillery, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
"nerves_firmware": {:hex, :nerves_firmware, "0.4.0", "ac2fed915a7ca4bb69f567d9b742d77cffc3a6a56420ce65e870c8c34119b935", [:mix], [], "hexpm"},
"nerves_firmware_ssh": {:hex, :nerves_firmware_ssh, "0.3.3", "79c42303ddbfd89ae6f5b4b19a4397a6188df21ca0e7a6573c2399e081fb7d25", [:mix], [{:nerves_runtime, "~> 0.4", [hex: :nerves_runtime, repo: "hexpm", optional: false]}], "hexpm"},
@ -49,6 +50,7 @@
"nerves_system_br": {:hex, :nerves_system_br, "1.4.1", "58a85d4dd85c84c7d1b535f9295aae64283638a9d9f49b8279f22ef1673eef42", [:mix], [], "hexpm"},
"nerves_system_farmbot_rpi3": {:hex, :nerves_system_farmbot_rpi3, "1.3.0-farmbot.2", "d8440383466a858c1e993ea27e47f1fd0281624853a59b554718ba0b86a06c52", [:mix], [{:nerves, "~> 1.0", [hex: :nerves, repo: "hexpm", optional: false]}, {:nerves_system_br, "1.4.1", [hex: :nerves_system_br, repo: "hexpm", optional: false]}, {:nerves_system_linter, "~> 0.3.0", [hex: :nerves_system_linter, repo: "hexpm", optional: false]}, {:nerves_toolchain_arm_unknown_linux_gnueabihf, "1.1.0", [hex: :nerves_toolchain_arm_unknown_linux_gnueabihf, repo: "hexpm", optional: false]}], "hexpm"},
"nerves_system_linter": {:hex, :nerves_system_linter, "0.3.0", "84e0f63c8ac196b16b77608bbe7df66dcf352845c4e4fb394bffd2b572025413", [:mix], [], "hexpm"},
"nerves_time": {:hex, :nerves_time, "0.2.0", "c8ae5cc020cd5e5b9f166f614b3dff30e10b25828715743aa97749cbfe0c5c0a", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:muontrap, "~> 0.4", [hex: :muontrap, repo: "hexpm", optional: false]}], "hexpm"},
"nerves_toolchain_arm_unknown_linux_gnueabihf": {:hex, :nerves_toolchain_arm_unknown_linux_gnueabihf, "1.1.0", "ca466a656f8653346a8551a35743f7c41046f3d53e945723e970cb4a7811e617", [:mix], [{:nerves, "~> 1.0", [hex: :nerves, repo: "hexpm", optional: false]}, {:nerves_toolchain_ctng, "~> 1.5.0", [hex: :nerves_toolchain_ctng, repo: "hexpm", optional: false]}], "hexpm"},
"nerves_toolchain_ctng": {:hex, :nerves_toolchain_ctng, "1.5.0", "34b8f5664858ff6ce09730b26221441398acd1fa361b8c6d744d9ec18238c16b", [:mix], [{:nerves, "~> 1.0", [hex: :nerves, repo: "hexpm", optional: false]}], "hexpm"},
"nerves_uart": {:hex, :nerves_uart, "1.2.0", "195424116b925cd3bf9d666be036c2a80655e6ca0f8d447e277667a60005c50e", [:mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm"},

View File

@ -1,13 +1,20 @@
### WARNING(Connor) 2018-08-16
### Do not touch anything in this file unless you understand _exactly_
### what you are doing. If you look at it wrong, you will cause the
### Raspberry pi to kernel panic for some reason. I have
### no idea why. Just move along.
### If you are in this file, please at least be kind enough as to not touch any
### of the timing sensitive things. It _will_ break.
defmodule Farmbot.Target.Network.Manager do
use GenServer
use Farmbot.Logger
alias Farmbot.System.ConfigStorage
import ConfigStorage, only: [get_config_value: 3]
alias Farmbot.Target.Network.Ntp
import Farmbot.System.ConfigStorage, only: [get_config_value: 3]
alias Farmbot.Target.Network.NotFoundTimer
import Farmbot.Target.Network, only: [test_dns: 0]
def debug_logs? do
Application.get_env(:farmbot, :network_debug_logs, true)
Application.get_env(:farmbot, :network_debug_logs, false)
end
def debug_logs(bool) do
@ -33,7 +40,12 @@ defmodule Farmbot.Target.Network.Manager do
Process.sleep(1000)
init(args)
end
Logger.success(3, "Interface #{interface} is up.")
s1 = get_config_value(:string, "settings", "default_ntp_server_1")
s2 = get_config_value(:string, "settings", "default_ntp_server_2")
Nerves.Time.set_ntp_servers([s1, s2])
maybe_hack_tzdata()
settings = Enum.map(opts, fn({key, value}) ->
case key do
@ -49,25 +61,43 @@ defmodule Farmbot.Target.Network.Manager do
domain = node() |> to_string() |> String.split("@") |> List.last() |> Kernel.<>(".local")
init_mdns(domain)
{:ok, %{mdns_domain: domain, interface: interface, opts: settings, ip_address: nil, connected: false, not_found_timer: nil, ntp_timer: nil, dns_timer: nil}}
state = %{
# These won't change
mdns_domain: domain,
interface: interface,
opts: settings,
# These change based on
# Events from timers and other processes.
ip_address: nil,
connected: false,
ap_connected: false,
# Tries to reconnect after "network not found" event.
reconnect_timer: nil,
# Tests internet connectivity.
dns_timer: nil,
}
{:ok, state}
end
def handle_call(:ip, _, state) do
{:reply, state.ip_address, state}
end
# When assigned an IP address.
def handle_info({Nerves.Udhcpc, :bound, %{ipv4_address: ip}}, state) do
Logger.debug 3, "Ip address: #{ip}"
NotFoundTimer.stop()
connected = match?({:ok, {:hostent, _, _, :inet, 4, _}}, test_dns())
if connected do
init_mdns(state.mdns_domain)
ntp_timer = restart_ntp_timer(state.ntp_timer)
not_found_timer = cancel_timer(state.not_found_timer)
dns_timer = restart_dns_timer(state.dns_timer, 45_000)
update_mdns(ip, state.mdns_domain)
{:noreply, %{state | dns_timer: dns_timer, ip_address: ip, connected: true, not_found_timer: not_found_timer, ntp_timer: ntp_timer}}
{:noreply, %{state | dns_timer: dns_timer, ip_address: ip, connected: true}}
else
{:noreply, %{state | connected: false, ntp_timer: nil, ip_address: ip}}
{:noreply, %{state | connected: false, ip_address: ip}}
end
end
@ -77,34 +107,39 @@ defmodule Farmbot.Target.Network.Manager do
{:stop, :normal, state}
end
def handle_info({Nerves.WpaSupplicant, :"CTRL-EVENT-NETWORK-NOT-FOUND", _}, %{not_found_timer: nil} = state) do
# stored in minutes
delay_timer = (ConfigStorage.get_config_value(:float, "settings", "network_not_found_timer") || 1) * 60_000
timer = Process.send_after(self(), :network_not_found_timer, round(delay_timer))
Logger.error 1, "Wireless Network not found. Will reset if not connected in #{delay_timer} minute(s)"
{:noreply, %{state | ip_address: nil, not_found_timer: timer, connected: false}}
def handle_info({Nerves.WpaSupplicant, :"CTRL-EVENT-NETWORK-NOT-FOUND", _}, state) do
NotFoundTimer.start()
{:noreply, %{state | ap_connected: false, ip_address: nil, connected: false}}
end
def handle_info({Nerves.WpaSupplicant, :"CTRL-EVENT-CONNECTED", _}, state) do
# Don't update connected. This is not a real test of connectivity.
# Don't update `connected`. This is not a real test of connectivity.
Logger.success 1, "Connected to access point."
{:noreply, state}
NotFoundTimer.stop()
{:noreply, %{state | ap_connected: true}}
end
def handle_info({Nerves.WpaSupplicant, :"CTRL-EVENT-DISCONNECTED", _}, state) do
# stored in minutes
nnft = get_config_value(:float, "settings", "network_not_found_timer") || 1
delay_timer = (nnft) * 60_000
timer = Process.send_after(self(), :network_not_found_timer, round(delay_timer))
Logger.error 1, "Wireless Network not found. Will reset if not connected in #{nnft} minute(s)"
if state.connected do
# TODO(Connor) - 2018-08-15 There is a bug in Nerves.Network
# Where `Nerves.Network.teardown(ifname)` doesn't actually do anything.
Nerves.Network.IFSupervisor.teardown(state.interface)
Process.sleep(5000)
Nerves.Network.setup(state.interface, state.opts)
end
{:noreply, %{state | ip_address: nil, not_found_timer: timer, connected: false}}
reconnect_timer = if state.connected, do: restart_connection_timer(state)
maybe_refresh_token()
NotFoundTimer.start()
new_state = %{state |
ap_connected: false,
connected: false,
ip_address: nil,
reconnect_timer: reconnect_timer
}
{:noreply, new_state}
# if state.connected do
# NotFoundTimer.start()
# Nerves.Network.IFSupervisor.teardown(state.interface)
# Process.sleep(5000)
# {:stop, :reconnect_timer, state}
# else
# # This event can come in for a brief moment while connecting.
# {:noreply, state}
# end
end
def handle_info({Nerves.WpaSupplicant, info, infoa}, state) do
@ -114,54 +149,16 @@ defmodule Farmbot.Target.Network.Manager do
{:noreply, state}
end
def handle_info(:network_not_found_timer, state) do
delay_minutes = (ConfigStorage.get_config_value(:float, "settings", "network_not_found_timer") || 1)
disable_factory_reset? = ConfigStorage.get_config_value(:bool, "settings", "disable_factory_reset")
first_boot? = ConfigStorage.get_config_value(:bool, "settings", "first_boot")
connected? = state.connected
cond do
connected? ->
Logger.warn 1, "Not resetting because network is connected."
{:noreply, %{state | not_found_timer: nil}}
disable_factory_reset? ->
Logger.warn 1, "Factory reset is disabled. Not resettings."
{:stop, :restart, %{state | not_found_timer: nil}}
first_boot? ->
msg = """
Network not found after #{delay_minutes} minute(s).
possible causes of this include:
1) A typo if you manually inputted the SSID.
2) The access point is out of range
3) There is too much radio interference around Farmbot.
5) There is a hardware issue.
"""
Logger.error 1, msg
Farmbot.System.factory_reset(msg)
{:stop, :network_not_found, %{state | not_found_timer: nil}}
true ->
Logger.error 1, "Network not found after timer. Farmbot is disconnected."
msg = """
Network not found after #{delay_minutes} minute(s).
This can happen if your wireless access point is no longer available,
out of range, or there is too much radio interference around Farmbot.
If you see this message intermittently you should disable \"automatic
factory reset\" or tune the \"network not found
timer\" value in the Farmbot Web Application.
"""
Farmbot.System.factory_reset(msg)
# Network.teardown(state.interface)
# Network.setup(state.interface, state.opts)
{:stop, :network_not_found, %{state | not_found_timer: nil}}
end
def handle_info(:reconnect_timer, %{ap_connected: false} = state) do
Logger.warn 1, "Wireless network not found still. Trying again."
# new_state = %{state | reconnect_timer: restart_connection_timer(state)}
# {:noreply, new_state}
{:stop, :reconnect_timer, state}
end
def handle_info(:ntp_timer, state) do
new_timer = restart_ntp_timer(state.ntp_timer)
{:noreply, %{state | ntp_timer: new_timer}}
def handle_info(:reconnect_timer, %{ap_connected: true} = state) do
Logger.success 1, "Wireless network reconnected."
{:noreply, state}
end
def handle_info(:dns_timer, %{connected: true} = state) do
@ -171,14 +168,13 @@ defmodule Farmbot.Target.Network.Manager do
{:noreply, %{state | dns_timer: restart_dns_timer(nil, 45_000)}}
{:error, err} ->
Farmbot.System.Registry.dispatch(:network, :dns_down)
maybe_refresh_token()
Logger.warn 3, "Farmbot was disconnected from the internet: #{inspect err}"
{:noreply, %{state | connected: false, dns_timer: restart_dns_timer(nil, 20_000)}}
end
end
def handle_info(:dns_timer, %{ip_address: nil} = state) do
Farmbot.System.Registry.dispatch(:network, :dns_down)
Logger.warn 3, "Farmbot still disconnected from the internet"
{:noreply, %{state | connected: false, dns_timer: restart_dns_timer(nil, 20_000)}}
end
@ -188,18 +184,16 @@ defmodule Farmbot.Target.Network.Manager do
{:ok, {:hostent, _host_name, aliases, :inet, 4, _}} ->
# If we weren't previously connected, send a log.
Logger.success 3, "Farmbot was reconnected to the internet: #{inspect aliases}"
maybe_refresh_token()
new_state = %{state |
connected: true,
not_found_timer: cancel_timer(state.not_found_timer),
dns_timer: restart_dns_timer(nil, 45_000),
ntp_timer: restart_ntp_timer(state.ntp_timer, 1000)
}
Farmbot.System.Registry.dispatch(:network, :dns_up)
{:noreply, new_state}
{:error, err} ->
Farmbot.System.Registry.dispatch(:network, :dns_down)
Logger.warn 3, "Farmbot was disconnected from the internet: #{inspect err}"
maybe_refresh_token()
{:noreply, %{state | connected: false, dns_timer: restart_dns_timer(nil, 20_000)}}
end
end
@ -223,24 +217,24 @@ defmodule Farmbot.Target.Network.Manager do
Process.send_after(self(), :dns_timer, time)
end
defp restart_ntp_timer(timer, time \\ nil) do
cancel_timer(timer)
# introduce a bit of randomness to avoid dosing ntp servers.
# I don't think this would ever happen but the default ntpd implementation
# does this..
rand = :rand.uniform(5000)
defp restart_connection_timer(state) do
# TODO(Connor) - 2018-08-15 There is a bug in Nerves.Network
# Where `Nerves.Network.teardown(ifname)` doesn't actually do anything.
cancel_timer(state.reconnect_timer)
Nerves.Network.IFSupervisor.teardown(state.interface)
Nerves.NetworkInterface.ifdown(state.interface)
Process.sleep(5000)
Nerves.NetworkInterface.ifup(state.interface)
Process.sleep(5000)
Nerves.Network.setup(state.interface, state.opts)
Process.send_after(self(), :reconnect_timer, 30_000)
end
case Ntp.set_time() do
# If we Successfully set time, sync again in around 1024 seconds
:ok -> Process.send_after(self(), :ntp_timer, (time || 1024000) + rand)
# If time failed, try again in about 5 minutes.
_ ->
if Farmbot.System.ConfigStorage.get_config_value(:bool, "settings", "first_boot") do
Process.send_after(self(), :ntp_timer, (time || 10_000) + rand)
else
Process.send_after(self(), :ntp_timer, (time || 300000) + rand)
end
defp maybe_refresh_token do
if Process.whereis(Farmbot.Bootstrap.AuthTask) do
Farmbot.Bootstrap.AuthTask.force_refresh()
else
Logger.warn 1, "AuthTask not running yet"
end
end
@ -272,4 +266,20 @@ defmodule Farmbot.Target.Network.Manager do
|> Enum.map(&String.to_integer/1)
|> List.to_tuple()
end
@fb_data_dir Application.get_env(:farmbot, :data_path)
@tzdata_dir Application.app_dir(:tzdata, "priv")
def maybe_hack_tzdata do
case Tzdata.Util.data_dir() do
@fb_data_dir -> :ok
_ ->
Logger.debug 3, "Hacking tzdata."
objs_to_cp = Path.wildcard(Path.join(@tzdata_dir, "*"))
for obj <- objs_to_cp do
File.cp_r obj, @fb_data_dir
end
Application.put_env(:tzdata, :data_dir, @fb_data_dir)
:ok
end
end
end

View File

@ -6,6 +6,7 @@ defmodule Farmbot.Target.Network do
import ConfigStorage, only: [get_config_value: 3]
alias ConfigStorage.NetworkInterface
alias Farmbot.Target.Network.Manager, as: NetworkManager
alias Farmbot.Target.Network.NotFoundTimer
alias Farmbot.Target.Network.ScanResult
use Supervisor
@ -173,7 +174,7 @@ defmodule Farmbot.Target.Network do
end
def to_child_spec({interface, opts}) do
worker(NetworkManager, [interface, opts])
worker(NetworkManager, [interface, opts], [restart: :transient])
end
def start_link(_, opts) do
@ -187,6 +188,7 @@ defmodule Farmbot.Target.Network do
|> Enum.map(&to_network_config/1)
|> Enum.map(&to_child_spec/1)
|> Enum.uniq() # Don't know why/if we need this?
children = [{NotFoundTimer, []}] ++ children
Supervisor.init(children, strategy: :one_for_one, max_restarts: 20, max_seconds: 1)
end
end

View File

@ -0,0 +1,93 @@
defmodule Farmbot.Target.Network.NotFoundTimer do
use GenServer
import Farmbot.System.ConfigStorage, only: [get_config_value: 3]
use Farmbot.Logger
def query do
GenServer.call(__MODULE__, :query)
end
def start do
GenServer.call(__MODULE__, :start)
end
def stop do
GenServer.call(__MODULE__, :stop)
end
def start_link(args) do
GenServer.start_link(__MODULE__, args, [name: __MODULE__])
end
def init([]) do
{:ok, %{timer: nil}}
end
def handle_call(:query, _, state) do
if state.timer do
r = Process.read_timer(state.timer)
{:reply, r, state}
else
{:reply, nil, state}
end
end
def handle_call(:start, _from, %{timer: nil} = state) do
minutes = get_config_value(:float, "settings", "network_not_found_timer") || 1
ms = (minutes * 60_000) |> round()
timer = Process.send_after(self(), :timer, ms)
Logger.debug 1, "Starting network not found timer: #{minutes} minute(s)"
{:reply, :ok, %{state | timer: timer}}
end
# Timer already started
def handle_call(:start, _from, state) do
{:reply, :ok, state}
end
def handle_call(:stop, _from, state) do
if state.timer do
Process.cancel_timer(state.timer)
end
{:reply, :ok, %{state | timer: nil}}
end
def handle_info(:timer, state) do
delay_minutes = (get_config_value(:float, "settings", "network_not_found_timer") || 1)
disable_factory_reset? = get_config_value(:bool, "settings", "disable_factory_reset")
first_boot? = get_config_value(:bool, "settings", "first_boot")
cond do
disable_factory_reset? ->
Logger.warn 1, "Factory reset is disabled. Not resetting."
{:noreply, %{state | timer: nil}}
first_boot? ->
msg = """
Network not found after #{delay_minutes} minute(s).
possible causes of this include:
1) A typo if you manually inputted the SSID.
2) The access point is out of range
3) There is too much radio interference around Farmbot.
5) There is a hardware issue.
"""
Logger.error 1, msg
Farmbot.System.factory_reset(msg)
{:stop, :normal, %{state | timer: nil}}
true ->
Logger.error 1, "Network not found after timer. Farmbot is disconnected."
msg = """
Network not found after #{delay_minutes} minute(s).
This can happen if your wireless access point is no longer available,
out of range, or there is too much radio interference around Farmbot.
If you see this message intermittently you should disable \"automatic
factory reset\" or tune the \"network not found
timer\" value in the Farmbot Web Application.
"""
Farmbot.System.factory_reset(msg)
{:stop, :normal, %{state | timer: nil}}
end
end
end

View File

@ -14,8 +14,13 @@ defmodule Farmbot.Target.SSHConsole do
port = get_config_value(:float, "settings", "ssh_port") |> round()
authorized_key = get_config_value(:string, "settings", "authorized_ssh_key")
decoded_authorized_key = do_decode(authorized_key)
ssh = start_ssh(port, decoded_authorized_key)
{:ok, %{ssh: ssh}}
case start_ssh(port, decoded_authorized_key) do
{:ok, ssh} ->
{:ok, %{ssh: ssh}}
_ ->
Logger.warn 1, "Could not start SSH."
:ignore
end
end
@ -33,15 +38,12 @@ defmodule Farmbot.Target.SSHConsole do
# Reuse the system_dir as well to allow for auth to work with the shared
# keys.
{:ok, ssh} =
:ssh.daemon(port, [
{:id_string, :random},
{:key_cb, {Nerves.Firmware.SSH.Keys, cb_opts}},
{:system_dir, Nerves.Firmware.SSH.Application.system_dir()},
{:shell, {Elixir.IEx, :start, []}}
])
ssh
:ssh.daemon(port, [
{:id_string, :random},
{:key_cb, {Nerves.Firmware.SSH.Keys, cb_opts}},
{:system_dir, Nerves.Firmware.SSH.Application.system_dir()},
{:shell, {Elixir.IEx, :start, []}}
])
end
defp do_decode(nil), do: []