-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path2018122702.html
More file actions
1 lines (1 loc) · 45 KB
/
2018122702.html
File metadata and controls
1 lines (1 loc) · 45 KB
1
<!DOCTYPE html><html class="theme-next mist use-motion" lang="zh-Hans"><head><meta name="generator" content="Hexo 3.9.0"><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1"><meta name="theme-color" content="#222"><script src="/lib/pace/pace.min.js?v=1.0.2"></script><link href="/lib/pace/pace-theme-minimal.min.css?v=1.0.2" rel="stylesheet"><meta http-equiv="Cache-Control" content="no-transform"><meta http-equiv="Cache-Control" content="no-siteapp"><link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css"><link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css"><link href="/css/main.css?v=5.1.3" rel="stylesheet" type="text/css"><link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-240x240-playpi.png?v=5.1.3"><link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-playpi.png?v=5.1.3"><link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-playpi.png?v=5.1.3"><link rel="mask-icon" href="/images/logo-playpi.svg?v=5.1.3" color="#222"><meta name="keywords" content="Hadoop,Spark,HDFS,nameNode,standby"><link rel="alternate" href="/atom.xml" title="虾丸派" type="application/atom+xml"><meta name="description" content="今天查看日志发现,以前正常运行的 Spark 程序会不断抛出异常:1org.apache.hadoop.ipc.RemoteException (org.apache.hadoop.ipc.StandbyException): Operation category READ is not supported in state standby但是却没有影响到功能的正常运行,只不过是抛出了大量的上述异"><meta name="keywords" content="Hadoop,Spark,HDFS,nameNode,standby"><meta property="og:type" content="article"><meta property="og:title" content="HDFS 异常之 READ is not supported in state standby"><meta property="og:url" content="https://www.playpi.org/2018122702.html"><meta property="og:site_name" content="虾丸派"><meta property="og:description" content="今天查看日志发现,以前正常运行的 Spark 程序会不断抛出异常:1org.apache.hadoop.ipc.RemoteException (org.apache.hadoop.ipc.StandbyException): Operation category READ is not supported in state standby但是却没有影响到功能的正常运行,只不过是抛出了大量的上述异"><meta property="og:locale" content="zh-Hans"><meta property="og:image" content="https://raw.githubusercontent.com/iplaypi/img-playpi/master/img/old/b7f2e3a3gy1fyluqlzruwj20ln0b6mxq.jpg"><meta property="og:updated_time" content="2018-12-27T11:06:42.000Z"><meta name="twitter:card" content="summary"><meta name="twitter:title" content="HDFS 异常之 READ is not supported in state standby"><meta name="twitter:description" content="今天查看日志发现,以前正常运行的 Spark 程序会不断抛出异常:1org.apache.hadoop.ipc.RemoteException (org.apache.hadoop.ipc.StandbyException): Operation category READ is not supported in state standby但是却没有影响到功能的正常运行,只不过是抛出了大量的上述异"><meta name="twitter:image" content="https://raw.githubusercontent.com/iplaypi/img-playpi/master/img/old/b7f2e3a3gy1fyluqlzruwj20ln0b6mxq.jpg"><script type="text/javascript" id="hexo.configurations">var NexT=window.NexT||{},CONFIG={root:"/",scheme:"Mist",version:"5.1.3",sidebar:{position:"left",display:"hide",offset:12,b2t:!1,scrollpercent:!0,onmobile:!1},fancybox:!0,tabs:!0,motion:{enable:!0,async:!1,transition:{post_block:"fadeIn",post_header:"slideDownIn",post_body:"slideDownIn",coll_header:"slideLeftIn",sidebar:"slideUpIn"}},duoshuo:{userId:"0",author:"博主"},algolia:{applicationID:"",apiKey:"",indexName:"",hits:{per_page:10},labels:{input_placeholder:"Search for Posts",hits_empty:"We didn't find any results for the search: ${query}",hits_stats:"${hits} results found in ${time} ms"}}}</script><link rel="canonical" href="https://www.playpi.org/2018122702.html"><title>HDFS 异常之 READ is not supported in state standby | 虾丸派</title></head><body itemscope itemtype="http://schema.org/WebPage" lang="zh-Hans"><div class="container sidebar-position-left page-post-detail"><div class="headband"></div><header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader"><div class="header-inner"><div class="site-brand-wrapper"><div class="site-meta"><div class="custom-logo-site-title"><a href="/" class="brand" rel="start"><span class="logo-line-before"><i></i></span> <span class="site-title">虾丸派</span> <span class="logo-line-after"><i></i></span></a></div><h1 class="site-subtitle" itemprop="description">烂笔头</h1></div><div class="site-nav-toggle"><button><span class="btn-bar"></span> <span class="btn-bar"></span> <span class="btn-bar"></span></button></div></div><nav class="site-nav"><ul id="menu" class="menu"><li class="menu-item menu-item-home"><a href="/" rel="section"><i class="menu-item-icon fa fa-fw fa-home"></i><br>首页</a></li><li class="menu-item menu-item-tags"><a href="/tags/" rel="section"><i class="menu-item-icon fa fa-fw fa-tags"></i><br>标签</a></li><li class="menu-item menu-item-categories"><a href="/categories/" rel="section"><i class="menu-item-icon fa fa-fw fa-th"></i><br>分类</a></li><li class="menu-item menu-item-archives"><a href="/archives/" rel="section"><i class="menu-item-icon fa fa-fw fa-archive"></i><br>归档</a></li><li class="menu-item menu-item-about"><a href="/about/" rel="section"><i class="menu-item-icon fa fa-fw fa-user"></i><br>关于</a></li><li class="menu-item menu-item-books"><a href="/books/" rel="section"><i class="menu-item-icon fa fa-fw fa-book"></i><br>书籍</a></li><li class="menu-item menu-item-guide"><a href="/guide/" rel="section"><i class="menu-item-icon fa fa-fw fa-location-arrow"></i><br>指南</a></li><li class="menu-item menu-item-search"><a href="javascript:;" class="popup-trigger"><i class="menu-item-icon fa fa-search fa-fw"></i><br>搜索</a></li></ul><div class="site-search"><div class="popup search-popup local-search-popup"><div class="local-search-header clearfix"><span class="search-icon"><i class="fa fa-search"></i> </span><span class="popup-btn-close"><i class="fa fa-times-circle"></i></span><div class="local-search-input-wrapper"><input autocomplete="off" placeholder="搜索..." spellcheck="false" type="text" id="local-search-input"></div></div><div id="local-search-result"></div></div></div></nav></div></header><main id="main" class="main"><div class="main-inner"><div class="content-wrap"><div id="content" class="content"><div id="posts" class="posts-expand"><article class="post post-type-normal" itemscope itemtype="http://schema.org/Article"><div class="post-block"><link itemprop="mainEntityOfPage" href="https://www.playpi.org/2018122702.html"><span hidden itemprop="author" itemscope itemtype="http://schema.org/Person"><meta itemprop="name" content="虾丸派"><meta itemprop="description" content="记录知识 | 分享技术"><meta itemprop="image" content="/images/favicon-1536x1536-playpi.png"></span><span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization"><meta itemprop="name" content="虾丸派"></span><header class="post-header"><h2 class="post-title" itemprop="name headline">HDFS 异常之 READ is not supported in state standby</h2><div class="post-meta"><span class="post-time"><span class="post-meta-item-text">发表于</span> <time title="创建于" itemprop="dateCreated datePublished" datetime="2018-12-27T19:06:42+08:00">2018-12-27 </time></span><span class="post-category"><span class="post-meta-divider">|</span> <span class="post-meta-item-text">分类于</span> <span itemprop="about" itemscope itemtype="http://schema.org/Thing"><a href="/categories/series-of-hadoop/" itemprop="url" rel="index"><span itemprop="name">Hadoop 从零基础到入门系列</span> </a></span></span><span id="busuanzi_container_page_pv" style="display:none"><span class="post-meta-divider">|</span> 阅读次数 <span id="busuanzi_value_page_pv"></span></span><div class="post-wordcount"><span class="post-meta-item-text">字数统计</span> <span title="字数统计">2,069字 </span><span class="post-meta-divider">|</span> <span class="post-meta-item-text">阅读时长 ≈</span> <span title="阅读时长">9分钟</span></div></div></header><div class="post-body" itemprop="articleBody"><p>今天查看日志发现,以前正常运行的 Spark 程序会不断抛出异常:<br></p><figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">org.apache.hadoop.ipc.RemoteException (org.apache.hadoop.ipc.StandbyException): Operation category READ is not supported in state standby</span><br></pre></td></tr></table></figure><p></p><p>但是却没有影响到功能的正常运行,只不过是抛出了大量的上述异常,而且内容都一样,也都是操作 HDFS 产生的,所以猜测与 HDFS 集群(或者配置)有关系。本文就记录发现问题、解决问题的过程。</p><a id="more"></a><h1 id="问题出现"><a href="# 问题出现" class="headerlink" title="问题出现"></a>问题出现</h1><p>按照日常操作,查看 Spark 任务的 Driver 端的日志,结果发现了大量的重复异常,又看了一下对功能的影响,结果发现没有影响,所有功能均正常运行,产生的结果也是期望的。</p><h2 id="问题分析"><a href="# 问题分析" class="headerlink" title="问题分析"></a>问题分析</h2><p>详细来看一下 Driver 端的日志异常信息:</p><figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br></pre></td><td class="code"><pre><span class="line">2018-12-26_23:25:40 [main] INFO retry.RetryInvocationHandler:140: Exception while invoking getFileInfo of class ClientNamenodeProtocolTranslatorPB over hadoop1/192.168.10.162:8020. Trying to fail over immediately.</span><br><span class="line">org.apache.hadoop.ipc.RemoteException (org.apache.hadoop.ipc.StandbyException): Operation category READ is not supported in state standby</span><br><span class="line"> at org.apache.hadoop.hdfs.server.namenode.ha.StandbyState.checkOperation (StandbyState.java:<span class="number">87</span>)</span><br><span class="line"> at org.apache.hadoop.hdfs.server.namenode.NameNode$NameNodeHAContext.checkOperation (NameNode.java:<span class="number">1722</span>)</span><br><span class="line"> at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkOperation (FSNamesystem.java:<span class="number">1362</span>)</span><br><span class="line"> at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getFileInfo (FSNamesystem.java:<span class="number">4414</span>)</span><br><span class="line"> at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getFileInfo (NameNodeRpcServer.java:<span class="number">893</span>)</span><br><span class="line"> at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getFileInfo (ClientNamenodeProtocolServerSideTranslatorPB.java:<span class="number">835</span>)</span><br><span class="line"> at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$<span class="number">2</span>.callBlockingMethod (ClientNamenodeProtocolProtos.java)</span><br><span class="line"> at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call (ProtobufRpcEngine.java:<span class="number">619</span>)</span><br><span class="line"> at org.apache.hadoop.ipc.RPC$Server.call (RPC.java:<span class="number">962</span>)</span><br><span class="line"> at org.apache.hadoop.ipc.Server$Handler$<span class="number">1</span>.run (Server.java:<span class="number">2039</span>)</span><br><span class="line"> at org.apache.hadoop.ipc.Server$Handler$<span class="number">1</span>.run (Server.java:<span class="number">2035</span>)</span><br><span class="line"> at java.security.AccessController.doPrivileged (Native Method)</span><br><span class="line"> at javax.security.auth.Subject.doAs (Subject.java:<span class="number">422</span>)</span><br><span class="line"> at org.apache.hadoop.security.UserGroupInformation.doAs (UserGroupInformation.java:<span class="number">1628</span>)</span><br><span class="line"> at org.apache.hadoop.ipc.Server$Handler.run (Server.java:<span class="number">2033</span>)</span><br><span class="line"> at org.apache.hadoop.ipc.Client.call (Client.java:<span class="number">1468</span>)</span><br><span class="line"> at org.apache.hadoop.ipc.Client.call (Client.java:<span class="number">1399</span>)</span><br><span class="line"> at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke (ProtobufRpcEngine.java:<span class="number">232</span>)</span><br><span class="line"> at com.sun.proxy.$Proxy30.getFileInfo (Unknown Source)</span><br><span class="line"> at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo (ClientNamenodeProtocolTranslatorPB.java:<span class="number">768</span>)</span><br><span class="line"> at sun.reflect.GeneratedMethodAccessor34.invoke (Unknown Source)</span><br><span class="line"> at sun.reflect.DelegatingMethodAccessorImpl.invoke (DelegatingMethodAccessorImpl.java:<span class="number">43</span>)</span><br><span class="line"> at java.lang.reflect.Method.invoke (Method.java:<span class="number">498</span>)</span><br><span class="line"> at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod (RetryInvocationHandler.java:<span class="number">187</span>)</span><br><span class="line"> at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke (RetryInvocationHandler.java:<span class="number">102</span>)</span><br><span class="line"> at com.sun.proxy.$Proxy31.getFileInfo (Unknown Source)</span><br><span class="line"> at org.apache.hadoop.hdfs.DFSClient.getFileInfo (DFSClient.java:<span class="number">2007</span>)</span><br><span class="line"> at org.apache.hadoop.hdfs.DistributedFileSystem$<span class="number">19</span>.doCall (DistributedFileSystem.java:<span class="number">1136</span>)</span><br><span class="line"> at org.apache.hadoop.hdfs.DistributedFileSystem$<span class="number">19</span>.doCall (DistributedFileSystem.java:<span class="number">1132</span>)</span><br><span class="line"> at org.apache.hadoop.fs.FileSystemLinkResolver.resolve (FileSystemLinkResolver.java:<span class="number">81</span>)</span><br><span class="line"> at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus (DistributedFileSystem.java:<span class="number">1132</span>)</span><br><span class="line"> at org.apache.hadoop.fs.FileSystem.isFile (FileSystem.java:<span class="number">1426</span>)</span><br></pre></td></tr></table></figure><p>注意一下核心异常所在:<br></p><figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">Exception while invoking getFileInfo of class ClientNamenodeProtocolTranslatorPB over hadoop1/192.168.10.162:8020. Trying to fail over immediately.</span><br><span class="line">org.apache.hadoop.ipc.RemoteException (org.apache.hadoop.ipc.StandbyException): Operation category READ is not supported in state standby</span><br></pre></td></tr></table></figure><p></p><p>当去从 hadoop1/192.168.10.162:8020 这里 getFileInfo 的时候,抛出了异常,而且明确告诉我们这台机器处于 standby 状态,不支持读取操作。此时,可以想到,肯定是 hadoop1/192.168.10.162:8020 这台机器已经处于 standby 状态了,无法提供服务,所以抛出此异常。既然问题找到了,那么问题产生的原因是什么呢,以及为什么对功能没有影响,接下来一一分析。</p><p>首先查看 hdfs-site.xml 配置文件,看看 namenode 相关的配置项:</p><figure class="highlight xml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br></pre></td><td class="code"><pre><span class="line"><span class="tag"><<span class="name">property</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">name</span>></span>dfs.nameservices<span class="tag"></<span class="name">name</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">value</span>></span>r-cluster<span class="tag"></<span class="name">value</span>></span></span><br><span class="line"><span class="tag"></<span class="name">property</span>></span></span><br><span class="line"></span><br><span class="line"><span class="tag"><<span class="name">property</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">name</span>></span>dfs.ha.namenodes.r-cluster<span class="tag"></<span class="name">name</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">value</span>></span>nn1,nn2<span class="tag"></<span class="name">value</span>></span></span><br><span class="line"><span class="tag"></<span class="name">property</span>></span></span><br><span class="line"></span><br><span class="line"><span class="tag"><<span class="name">property</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">name</span>></span>dfs.namenode.rpc-address.r-cluster.nn1<span class="tag"></<span class="name">name</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">value</span>></span>hadoop1:8020<span class="tag"></<span class="name">value</span>></span></span><br><span class="line"><span class="tag"></<span class="name">property</span>></span></span><br><span class="line"></span><br><span class="line"><span class="tag"><<span class="name">property</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">name</span>></span>dfs.namenode.rpc-address.r-cluster.nn2<span class="tag"></<span class="name">name</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">value</span>></span>rocket15:8020<span class="tag"></<span class="name">value</span>></span></span><br><span class="line"><span class="tag"></<span class="name">property</span>></span></span><br></pre></td></tr></table></figure><p>可以看到,namenode 相关配置有 2 台机器:nn1、nn2,而上述产生异常的信息表明连接 nn1 被拒绝,那么我去看一下 HDFS 集群的状态,发现 nn1 果然是 standby 状态的,而 nn2(rocket15) 才是 active 状态。</p><p><img src="https://raw.githubusercontent.com/iplaypi/img-playpi/master/img/old/b7f2e3a3gy1fyluqlzruwj20ln0b6mxq.jpg" alt="nn2 的 active 状态" title="nn2 的 active 状态"></p><p>再仔细查看日志,没有发现连接 nn2 的异常,那就说明是第一次连接 nn1 抛出异常,然后试图连接 nn2,成功连接,没有抛出异常,接下来程序就正常处理数据了,对功能没有任何影响。</p><p>到这里,我们已经分析出了整个过程,现象表明这个异常只是连接了 standby 状态的 namenode,是正常抛出的。然后会再次连接另外一台 active 状态的 namenode,连接成功。</p><h2 id="抛异常的流程细节"><a href="# 抛异常的流程细节" class="headerlink" title="抛异常的流程细节"></a>抛异常的流程细节</h2><p>1、客户端在连接 HDFS 的时候,会从配置文件 hdfs-site.xml 中,读取 nameservices 的配置,获取机器编号,我这里是 nn1 和 nn2,分别对应着 2 台 namenode 机器;</p><p>2、客户端会首先选择编号较小的 namenode(我这里是 nn1,对应着 hadoop1),试图连接;</p><p>3、如果这台 namenode 是 active 状态,则客户端可以正常处理请求;但是如果这台 namenode 是 standby 状态,则客户端抛出由服务端返回的异常:Operation category READ is not supported in state standby,同时打印 ip 信息,接着会尝试连接另外一台编号较大的 namenode(我这里是 nn2,即 rocket15);</p><p>4、如果连接成功,则客户端可以正常处理请求;如果 nn2 仍然像 nn1 一样,客户端会抛出一样的异常,此时会继续反复重试 nn1 与 nn2(重试次数有配置项,间隔时间有配置项);如果有成功的,则客户端可以正常处理请求,如果全部失败,则客户端无法正常处理请求,此时应该要关注解决 namenode 为什么全部都处在 standby 状态。</p><p>配置参数如下(参考 <a href="https://hadoop.apache.org/docs/r2.6.0/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml" target="_blank" rel="noopener">Hadoop 官方文档 </a>):</p><figure class="highlight xml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"><!-- 客户端重试次数,默认 15 --></span></span><br><span class="line"><span class="tag"><<span class="name">property</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">name</span>></span>dfs.client.failover.max.attempts<span class="tag"></<span class="name">name</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">value</span>></span>15<span class="tag"></<span class="name">value</span>></span></span><br><span class="line"><span class="tag"></<span class="name">property</span>></span></span><br><span class="line"></span><br><span class="line"><span class="comment"><!-- 客户端 2 次重试间隔时间,默认 500 毫秒 --></span></span><br><span class="line"><span class="tag"><<span class="name">property</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">name</span>></span>dfs.client.failover.sleep.base.millis<span class="tag"></<span class="name">name</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">value</span>></span>500<span class="tag"></<span class="name">value</span>></span></span><br><span class="line"><span class="tag"></<span class="name">property</span>></span></span><br><span class="line"></span><br><span class="line"><span class="comment"><!-- 客户端 2 次重试间隔时间,默认 1500 毫秒 --></span></span><br><span class="line"><span class="tag"><<span class="name">property</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">name</span>></span>dfs.client.failover.sleep.max.millis<span class="tag"></<span class="name">name</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">value</span>></span>1500<span class="tag"></<span class="name">value</span>></span></span><br><span class="line"><span class="tag"></<span class="name">property</span>></span></span><br><span class="line"></span><br><span class="line"><span class="comment"><!-- 客户端 1 次连接中重试次数,默认 0, 在网络不稳定时建议加大此值 --></span></span><br><span class="line"><span class="tag"><<span class="name">property</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">name</span>></span>dfs.client.failover.connection.retries<span class="tag"></<span class="name">name</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">value</span>></span>0<span class="tag"></<span class="name">value</span>></span></span><br><span class="line"><span class="tag"></<span class="name">property</span>></span></span><br><span class="line"></span><br><span class="line"><span class="comment"><!-- 客户端 1 次连接中超时重试次数,仅是指超时重试,默认 0, 在网络不稳定时建议加大此值 --></span></span><br><span class="line"><span class="tag"><<span class="name">property</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">name</span>></span>dfs.client.failover.connection.retries.on.timeouts<span class="tag"></<span class="name">name</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">value</span>></span>0<span class="tag"></<span class="name">value</span>></span></span><br><span class="line"><span class="tag"></<span class="name">property</span>></span></span><br></pre></td></tr></table></figure><h1 id="问题解决"><a href="# 问题解决" class="headerlink" title="问题解决"></a> 问题解决</h1><p>既然明确了问题,并且分析出了具体原因,解决起来就简单了,对于我这种情况,有 2 种方法:</p><p>1、不用解决,也无需关心,这个异常没有任何影响,会自动重连另外一台 active 状态的 namenode 机器的;</p><p>2、如果就是一心想把异常消除掉,那就更改 hdfs-site.xml 配置文件里面的 nameservices 配置项对应的机器,把编号最小的机器设置成状态为 active 的 namenode(例如我这里把 nn1、nn2 的对应的机器 ip 地址交换一下即可,确保 nn1 是 active 状态的),那么连接 HDFS 的时候第一次就会直接连接这台机器,就不会抛出异常了(但是要注意 namenode 以后可能是会挂的,挂了会自动切换,那么到那个时候还要更改这个配置项)。</p><figure class="highlight xml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line"><span class="tag"><<span class="name">property</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">name</span>></span>dfs.namenode.rpc-address.r-cluster.nn1<span class="tag"></<span class="name">name</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">value</span>></span>rocket15:8020<span class="tag"></<span class="name">value</span>></span></span><br><span class="line"><span class="tag"></<span class="name">property</span>></span></span><br><span class="line"></span><br><span class="line"><span class="tag"><<span class="name">property</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">name</span>></span>dfs.namenode.rpc-address.r-cluster.nn2<span class="tag"></<span class="name">name</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">value</span>></span>hadoop1:8020<span class="tag"></<span class="name">value</span>></span></span><br><span class="line"><span class="tag"></<span class="name">property</span>></span></span><br></pre></td></tr></table></figure><h1 id="问题总结"><a href="# 问题总结" class="headerlink" title="问题总结"></a>问题总结</h1><p>1、参考:<a href="http://support-it.huawei.com/docs/zh-cn/fusioninsight-all/maintenance-guide/zh-cn_topic_0062904132.html" target="_blank" rel="noopener">http://support-it.huawei.com/docs/zh-cn/fusioninsight-all/maintenance-guide/zh-cn_topic_0062904132.html</a></p><p>2、这个问题其实不是问题,只不过抛出了异常,我看到有点担心而已,但是如果连接所有的机器都抛出这种异常,并且重试了很多次就有影响了,说明所有的 namenode 都挂了,根本无法正常操作 HDFS 系统;</p><p>3、根据 2 进行总结:如果只是在操作 HDFS 的时候打印一次(每次操作都会打印一次),说明第一次连接到了 standby 状态的 namenode,是正常的,不用关心;但是,如果出现了大量的异常(比如连续 10 次,连续 20 次),说明 namenode 出问题了,此时应该关心 namenode 的状态,确保正常服务。</p></div><div><div id="wechat_subscriber" style="display:block;padding:10px 0;margin:20px auto;width:100%;text-align:center"><img id="wechat_subscriber_qcode" src="/images/wechat-qr-personal.jpg" alt="虾丸派 wechat" style="width:200px;max-width:100%"><div>扫一扫添加博主,进技术交流群,共同学习进步</div></div></div><div><div style="padding:10px 0;margin:20px auto;width:90%;text-align:center"><div>永不止步</div><button id="rewardButton" disable="enable" onclick='var qr=document.getElementById("QR");"none"===qr.style.display?qr.style.display="block":qr.style.display="none"'><span>打赏</span></button><div id="QR" style="display:none"><div id="wechat" style="display:inline-block"><img id="wechat_qr" src="/images/wechat-pay-playpi.png" alt="虾丸派 微信支付"><p>微信支付</p></div></div></div></div><div><ul class="post-copyright"><li class="post-copyright-author"><strong>本文作者:</strong> 虾丸派</li><li class="post-copyright-link"><strong>本文链接:</strong> <a href="https://www.playpi.org/2018122702.html" title="HDFS 异常之 READ is not supported in state standby">https://www.playpi.org/2018122702.html</a></li><li class="post-copyright-license"><strong>版权声明: </strong>本博客所有文章除特别声明外,均采用 <a href="https://creativecommons.org/licenses/by-nc-sa/3.0/" rel="external nofollow" target="_blank">CC BY-NC-SA 3.0</a> 许可协议。转载请注明出处!</li></ul></div><footer class="post-footer"><div class="post-tags"><a href="/tags/Hadoop/" rel="tag"><i class="fa fa-tag"></i> Hadoop</a> <a href="/tags/Spark/" rel="tag"><i class="fa fa-tag"></i> Spark</a> <a href="/tags/HDFS/" rel="tag"><i class="fa fa-tag"></i> HDFS</a> <a href="/tags/nameNode/" rel="tag"><i class="fa fa-tag"></i> nameNode</a> <a href="/tags/standby/" rel="tag"><i class="fa fa-tag"></i> standby</a></div><div class="post-nav"><div class="post-nav-next post-nav-item"><a href="/2018122701.html" rel="next" title="HDFS 异常之 Filesystem closed"><i class="fa fa-chevron-left"></i> HDFS 异常之 Filesystem closed</a></div><span class="post-nav-divider"></span><div class="post-nav-prev post-nav-item"><a href="/2018122801.html" rel="prev" title="Spark 异常之 java.net.BindException: 地址已在使用">Spark 异常之 java.net.BindException: 地址已在使用 <i class="fa fa-chevron-right"></i></a></div></div></footer></div></article><div class="post-spread"></div></div></div><div class="comments" id="comments"><div id="vcomments"></div></div></div><div class="sidebar-toggle"><div class="sidebar-toggle-line-wrap"><span class="sidebar-toggle-line sidebar-toggle-line-first"></span> <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span> <span class="sidebar-toggle-line sidebar-toggle-line-last"></span></div></div><aside id="sidebar" class="sidebar"><div class="sidebar-inner"><ul class="sidebar-nav motion-element"><li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap">文章目录</li><li class="sidebar-nav-overview" data-target="site-overview-wrap">站点概览</li></ul><section class="site-overview-wrap sidebar-panel"><div class="site-overview"><div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person"><img class="site-author-image" itemprop="image" src="/images/favicon-1536x1536-playpi.png" alt="虾丸派"><p class="site-author-name" itemprop="name">虾丸派</p><p class="site-description motion-element" itemprop="description">记录知识 | 分享技术</p></div><nav class="site-state motion-element"><div class="site-state-item site-state-posts"><a href="/archives/"><span class="site-state-item-count">144</span> <span class="site-state-item-name">日志</span></a></div><div class="site-state-item site-state-categories"><a href="/categories/index.html"><span class="site-state-item-count">13</span> <span class="site-state-item-name">分类</span></a></div><div class="site-state-item site-state-tags"><a href="/tags/index.html"><span class="site-state-item-count">294</span> <span class="site-state-item-name">标签</span></a></div></nav><div class="feed-link motion-element"><a href="/atom.xml" rel="alternate"><i class="fa fa-rss"></i> RSS</a></div><div class="links-of-author motion-element"><span class="links-of-author-item"><a href="https://github.com/iplaypi" target="_blank" title="GitHub"><i class="fa fa-fw fa-github"></i>GitHub</a> </span><span class="links-of-author-item"><a href="https://weibo.com/u/3086148515" target="_blank" title="微博"><i class="fa fa-fw fa-weibo"></i>微博</a> </span><span class="links-of-author-item"><a href="mailto:playpi@qq.com" target="_blank" title="E-Mail"><i class="fa fa-fw fa-envelope"></i>E-Mail</a></span></div><div class="cc-license motion-element" itemprop="license"><a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" class="cc-opacity" target="_blank" rel="external nofollow"><img src="/images/cc-by-nc-sa.svg" alt="Creative Commons"></a></div><div class="links-of-blogroll motion-element links-of-blogroll-inline"><div class="links-of-blogroll-title"><i class="fa fa-fw fa-link"></i> 友情链接</div><ul class="links-of-blogroll-list"><li class="links-of-blogroll-item"><a href="https://github.com/iplaypi" title="GitHub" target="_blank" rel="external nofollow">GitHub</a></li><li class="links-of-blogroll-item"><a href="https://weibo.com/u/3086148515" title="Weibo" target="_blank" rel="external nofollow">Weibo</a></li><li class="links-of-blogroll-item"><a href="https://www.playpi.org" title="虾丸派" target="_blank" rel="external nofollow">虾丸派</a></li><li class="links-of-blogroll-item"><a href="https://www.playpi.org" title="playpi" target="_blank" rel="external nofollow">playpi</a></li><li class="links-of-blogroll-item"><a href="https://www.liaoxuefeng.com" title="廖雪峰" target="_blank" rel="external nofollow">廖雪峰</a></li><li class="links-of-blogroll-item"><a href="http://www.ruanyifeng.com" title="阮一峰" target="_blank" rel="external nofollow">阮一峰</a></li><li class="links-of-blogroll-item"><a href="https://travis-ci.org/iplaypi/iplaypi.github.io" title="travis-ci" target="_blank" rel="external nofollow">travis-ci</a></li><li class="links-of-blogroll-item"><a href="https://www.vultr.com/?ref=7861302-4F" title="Vultr" target="_blank" rel="external nofollow">Vultr</a></li></ul></div></div></section><section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active"><div class="post-toc"><div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-1"><a class="nav-link" href="#问题出现"><span class="nav-number">1.</span> <span class="nav-text">问题出现</span></a><ol class="nav-child"><li class="nav-item nav-level-2"><a class="nav-link" href="#问题分析"><span class="nav-number">1.1.</span> <span class="nav-text">问题分析</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#抛异常的流程细节"><span class="nav-number">1.2.</span> <span class="nav-text">抛异常的流程细节</span></a></li></ol></li><li class="nav-item nav-level-1"><a class="nav-link" href="#问题解决"><span class="nav-number">2.</span> <span class="nav-text">问题解决</span></a></li><li class="nav-item nav-level-1"><a class="nav-link" href="#问题总结"><span class="nav-number">3.</span> <span class="nav-text">问题总结</span></a></li></ol></div></div></section></div></aside></div></main><footer id="footer" class="footer"><div class="footer-inner"><div class="copyright">© 2016–<span itemprop="copyrightYear">2021</span> <span class="post-meta-divider">|</span> <span class="with-love"><i class="fa fa-heart"></i> </span><span class="author" itemprop="copyrightHolder">虾丸派</span> <span class="post-meta-divider">|</span> <span class="post-meta-item-icon"><i class="fa fa-area-chart"></i> </span><span class="post-meta-item-text">全站字数统计</span> <span title="全站字数统计">326.3k 字</span></div><div class="powered-by">由 <a class="theme-link" target="_blank" href="https://hexo.io" rel="external nofollow">Hexo</a> 强力驱动</div><span class="post-meta-divider">|</span><div class="theme-info">主题 <a class="theme-link" target="_blank" href="https://github.com/iissnan/hexo-theme-next" rel="external nofollow">NexT.Mist</a><script async src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script><span id="busuanzi_container_site_pv" style="display:none"><span class="post-meta-divider">|</span> 总访问量 <span id="busuanzi_value_site_pv"></span> 次 </span><span id="busuanzi_container_site_uv" style="display:none"><span class="post-meta-divider">|</span> 总访客 <span id="busuanzi_value_site_uv"></span> 人</span></div><div class="busuanzi-count"><script async src="https://dn-lbstatics.qbox.me/busuanzi/2.3/busuanzi.pure.mini.js"></script></div></div></footer><div class="back-to-top"><i class="fa fa-arrow-up"></i> <span id="scrollpercent"><span>0</span>%</span></div></div><script type="text/javascript">"[object Function]"!==Object.prototype.toString.call(window.Promise)&&(window.Promise=null)</script><script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script><script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script><script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script><script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script><script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script><script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script><script type="text/javascript" src="/js/src/utils.js?v=5.1.3"></script><script type="text/javascript" src="/js/src/motion.js?v=5.1.3"></script><script type="text/javascript" src="/js/src/scrollspy.js?v=5.1.3"></script><script type="text/javascript" src="/js/src/post-details.js?v=5.1.3"></script><script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.3"></script><script src="//unpkg.com/valine@1.3.7/dist/Valine.min.js"></script><script type="text/javascript">new Valine({av:AV,el:"#comments",verify:!1,notify:!1,app_id:"FC5Jijeg1meo2K2OzPYWK327-gzGzoHsz",app_key:"6A1ReY8tjhPutK00F01YbJSq",placeholder:"没有问题吗?"})</script><script type="text/javascript">var isfetched=!1,isXml=!0,search_path="search.xml";0===search_path.length?search_path="search.xml":/json$/i.test(search_path)&&(isXml=!1);var path="/"+search_path,onPopupClose=function(t){$(".popup").hide(),$("#local-search-input").val(""),$(".search-result-list").remove(),$("#no-result").remove(),$(".local-search-pop-overlay").remove(),$("body").css("overflow","")};function proceedsearch(){$("body").append('<div class="search-popup-overlay local-search-pop-overlay"></div>').css("overflow","hidden"),$(".search-popup-overlay").click(onPopupClose),$(".popup").toggle();var t=$("#local-search-input");t.attr("autocapitalize","none"),t.attr("autocorrect","off"),t.focus()}var searchFunc=function(t,e,s){"use strict";$("body").append('<div class="search-popup-overlay local-search-pop-overlay"><div id="search-loading-icon"><i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i></div></div>').css("overflow","hidden"),$("#search-loading-icon").css("margin","20% auto 0 auto").css("text-align","center"),$.ajax({url:t,dataType:isXml?"xml":"json",async:!0,success:function(t){isfetched=!0,$(".popup").detach().appendTo(".header-inner");var o=isXml?$("entry",t).map(function(){return{title:$("title",this).text(),content:$("content",this).text(),url:$("url",this).text()}}).get():t,n=document.getElementById(e),r=document.getElementById(s),t=function(){var m=n.value.trim().toLowerCase(),x=m.split(/[\s\-]+/);1<x.length&&x.push(m);var e,w=[];0<m.length&&o.forEach(function(t){var e=!1,o=0,h=0,n=t.title.trim(),r=n.toLowerCase(),s=t.content.trim().replace(/<[^>]+>/g,""),a=s.toLowerCase(),i=decodeURIComponent(t.url),c=[],l=[];if(""!=n&&(x.forEach(function(t){function e(t,e,o){var n=t.length;if(0===n)return[];var r,s=0,a=[];for(o||(e=e.toLowerCase(),t=t.toLowerCase());-1<(r=e.indexOf(t,s));)a.push({position:r,word:t}),s=r+n;return a}c=c.concat(e(t,r,!1)),l=l.concat(e(t,a,!1))}),(0<c.length||0<l.length)&&(e=!0,o=c.length+l.length)),e){function p(t,e,o,n){for(var r=n[n.length-1],s=r.position,a=r.word,i=[],c=0;s+a.length<=o&&0!=n.length;){a===m&&c++,i.push({position:s,length:a.length});var l=s+a.length;for(n.pop();0!=n.length&&(s=(r=n[n.length-1]).position,a=r.word,s<l);)n.pop()}return h+=c,{hits:i,start:e,end:o,searchTextCount:c}}[c,l].forEach(function(t){t.sort(function(t,e){return e.position!==t.position?e.position-t.position:t.word.length-e.word.length})});t=[];0!=c.length&&t.push(p(0,0,n.length,c));for(var u=[];0!=l.length;){var f=l[l.length-1],d=f.position,g=f.word,v=d-20,f=d+80;v<0&&(v=0),(f=f<d+g.length?d+g.length:f)>s.length&&(f=s.length),u.push(p(0,v,f,l))}u.sort(function(t,e){return t.searchTextCount!==e.searchTextCount?e.searchTextCount-t.searchTextCount:t.hits.length!==e.hits.length?e.hits.length-t.hits.length:t.start-e.start});e=parseInt("1");function $(o,t){var n="",r=t.start;return t.hits.forEach(function(t){n+=o.substring(r,t.position);var e=t.position+t.length;n+='<b class="search-keyword">'+o.substring(t.position,e)+"</b>",r=e}),n+=o.substring(r,t.end)}0<=e&&(u=u.slice(0,e));var C="";0!=t.length?C+="<li><a href='"+i+"' class='search-result-title'>"+$(n,t[0])+"</a>":C+="<li><a href='"+i+"' class='search-result-title'>"+n+"</a>",u.forEach(function(t){C+="<a href='"+i+'\'><p class="search-result">'+$(s,t)+"...</p></a>"}),C+="</li>",w.push({item:C,searchTextCount:h,hitCount:o,id:w.length})}}),1===x.length&&""===x[0]?r.innerHTML='<div id="no-result"><i class="fa fa-search fa-5x" /></div>':0===w.length?r.innerHTML='<div id="no-result"><i class="fa fa-frown-o fa-5x" /></div>':(w.sort(function(t,e){return t.searchTextCount!==e.searchTextCount?e.searchTextCount-t.searchTextCount:t.hitCount!==e.hitCount?e.hitCount-t.hitCount:e.id-t.id}),e='<ul class="search-result-list">',w.forEach(function(t){e+=t.item}),e+="</ul>",r.innerHTML=e)};n.addEventListener("input",t),$(".local-search-pop-overlay").remove(),$("body").css("overflow",""),proceedsearch()}})};$(".popup-trigger").click(function(t){t.stopPropagation(),!1===isfetched?searchFunc(path,"local-search-input","local-search-result"):proceedsearch()}),$(".popup-btn-close").click(onPopupClose),$(".popup").click(function(t){t.stopPropagation()}),$(document).on("keyup",function(t){27===t.which&&$(".search-popup").is(":visible")&&onPopupClose()})</script><script>!function(){var t=document.createElement("script"),e=window.location.protocol.split(":")[0];t.src="https"===e?"https://zz.bdstatic.com/linksubmit/push.js":"http://push.zhanzhang.baidu.com/push.js";e=document.getElementsByTagName("script")[0];e.parentNode.insertBefore(t,e)}()</script><script type="text/javascript" src="/js/src/js.cookie.js?v=5.1.3"></script><script type="text/javascript" src="/js/src/scroll-cookie.js?v=5.1.3"></script><script src="/live2dw/lib/L2Dwidget.min.js?094cbace49a39548bed64abff5988b05"></script><script>L2Dwidget.init({pluginRootPath:"live2dw/",pluginJsPath:"lib/",pluginModelPath:"assets/",tagMode:!1,debug:!1,model:{scale:1,jsonPath:"/live2dw/assets/hijiki.model.json"},display:{position:"left",width:100,height:200,hOffset:0,vOffset:-20},mobile:{show:!1,motion:!0,scale:.3},log:!1})</script></body></html>