- #!/usr/bin/env python
- # -*- encoding:utf-8 -*-
- """A library to access Hadoop HTTP REST API,
- make sure you hadoop cluster open the http access .
- """
- '''
- author : liran
- data : 2013-03-11
-
- 致谢:xwu
- 武汉云雅科技有限公司
-
- '''
- import StringIO
- import pycurl
- import re
- import sys
- import logging
- import os
-
- class WebHadoop(object):
- def __init__(self,host,port,username,logger,prefix="/webhdfs/v1"):
- self.host = host
- self.port = port
- self.user = username
- self.logger = logger
- self.prefix = prefix
- self.status = None
- self.url = "http://%s:%s" % (host,port)
- selfself.url_path = self.url + self.prefix
-
-
-
- def checklink(self):
- try:
- b = StringIO.StringIO()
- c = pycurl.Curl()
- checkurl = self.url + "/dfsnodelist.jsp?whatNodes=LIVE"
- c.setopt(pycurl.URL, checkurl)
- c.setopt(pycurl.HTTPHEADER, ["Accept:"])
- c.setopt(pycurl.WRITEFUNCTION, b.write)
- c.setopt(pycurl.FOLLOWLOCATION, 1)
- c.setopt(pycurl.MAXREDIRS, 5)
- c.perform()
- self.status = c.getinfo(c.HTTP_CODE)
- bbody = b.getvalue()
- self.Write_Debug_Log(self.status,checkurl)
- p = re.compile(r'''Live Datanodes :(.*)</a''')
- results = p.findall(body)
- b.close()
- if results[0] == "0":
- self.logger.error("Sorry, There are not live datanodes in Hadoop Cluster!!!")
- self.curlObj.close()
- sys.exit(255)
- return results[0]
- except pycurl.error,e:
- self.logger.error("Sorry, can not get the hadoop http link .Erros: %s" % e)
- c.close()
- b.close()
- sys.exit(255)
- finally:
- c.close()
- b.close()
-
-
- def lsdir(self,path):
- try:
- b = StringIO.StringIO()
- put_str = '[{"op":LISTSTATUS}]'
-
- c = pycurl.Curl()
-
- lsdir_url = self.url_path + path + "?op=LISTSTATUS"
- c.setopt(pycurl.URL, lsdir_url)
- c.setopt(pycurl.HTTPHEADER, ["Accept:"])
- c.setopt(pycurl.WRITEFUNCTION, b.write)
- c.setopt(pycurl.FOLLOWLOCATION, 1)
- c.setopt(pycurl.MAXREDIRS, 5)
- c.perform()
- bbody = b.getvalue()
- self.status = c.getinfo(c.HTTP_CODE)
- except Exception,e:
- print e
- finally:
- c.close()
- b.close()
-
-
- if self.status == 200:
- data_dir = eval(body)
- return data_dir['FileStatuses']['FileStatus']
-
- else:
- self.logger.error("Sorry,can not list the dir or file status!!!")
- self.Write_Debug_Log(self.status,lsdir_url)
- return False
-
-
- def lsfile(self,path):
- try:
- c = pycurl.Curl()
- b = StringIO.StringIO()
- put_str = '[{"op":LISTSTATUS}]'
- lsdir_url = self.url_path + path + "?op=GETFILESTATUS"
- c.setopt(pycurl.URL, lsdir_url)
- c.setopt(pycurl.HTTPHEADER, ["Accept:"])
- c.setopt(pycurl.WRITEFUNCTION, b.write)
- c.setopt(pycurl.FOLLOWLOCATION, 1)
- c.setopt(pycurl.MAXREDIRS, 5)
- c.perform()
- bbody = b.getvalue()
- self.status = c.getinfo(c.HTTP_CODE)
- except Exception,e:
- print e
- finally:
- c.close()
- b.close()
-
- if self.status == 200:
- data_dir = eval(body)
- if data_dir['FileStatus']['type'] == "DIRECTORY":
- self.logger.error("Sorry,this file %s is a dir actually!!!" % (path))
- return False
- else:
- return data_dir['FileStatus']
- else:
- self.logger.error("Sorry,can not list the dir or file status!!!")
- self.Write_Debug_Log(self.status,lsdir_url)
- return False
-
- def mkdir(self,path,permission="755"):
- try:
- print "yes ,mkdir function"
- b = StringIO.StringIO()
- c = pycurl.Curl()
- mkdir_str = '[{"op":"MKDIRS","permission"=permission}]'
- mkdir_url = "%s%s?op=MKDIRS&permission=%s" % (self.url_path,path,permission)
- c.setopt(pycurl.URL, mkdir_url)
- c.setopt(pycurl.HTTPHEADER,['Content-Type: application/json','Content-Length: '+str(len(mkdir_str))])
- c.setopt(pycurl.CUSTOMREQUEST,"PUT")
- c.setopt(pycurl.POSTFIELDS,mkdir_str)
-
- c.setopt(pycurl.WRITEFUNCTION, b.write)
- c.setopt(pycurl.FOLLOWLOCATION, 1)
- c.setopt(pycurl.MAXREDIRS, 5)
- c.perform()
- self.status = c.getinfo(c.HTTP_CODE)
- bbody = b.getvalue()
- b.close()
- except Exception,e:
- print e
- finally:
- c.close()
-
-
- if self.status == 200 :
- if "true" in body:
- self.logger.info("Great,Successfully Create dir %s in hadoop cluster!!" % (path))
- return True
- elif "false" in body:
- self.logger.info("Sorry,can't create this %s dir in hadoop cluster!!1!!")
- return False
- else:
- return False
- else:
- self.logger.error("Sorry,can't create this %s dir in hadoop cluster!!1" % (path))
- self.Write_Debug_Log(self.status,mkdir_url)
-
-
- def remove(self,path,recursive="True"):
- try:
- c = pycurl.Curl()
- b = StringIO.StringIO()
- remove_str = '[{"op":"DELETE","recursive"=recursive}]'
- remvoe_url = "%s%s?op=DELETE&recursive=%s" % (self.url_path,path,recursive)
- c.setopt(pycurl.URL, remvoe_url)
- c.setopt(pycurl.HTTPHEADER,['Content-Type: application/json','Content-Length: '+str(len(remove_str))])
- c.setopt(pycurl.CUSTOMREQUEST,"DELETE")
- c.setopt(pycurl.POSTFIELDS,remove_str)
- c.setopt(pycurl.WRITEFUNCTION, b.write)
- c.setopt(pycurl.FOLLOWLOCATION, 1)
- c.setopt(pycurl.MAXREDIRS, 5)
- c.perform()
- bbody = b.getvalue()
- print type(body)
- self.status = c.getinfo(c.HTTP_CODE)
- except Exception,e:
- print e
- finally:
- c.close()
- b.close()
- if self.status == 200 :
- if "true" in body:
- print "yes ,it in"
- self.logger.info("Great,Successfully delete dir or file %s in hadoop cluster!!" % (path))
- return True
- elif "false" in body:
- print "no ,it is not"
- self.logger.info("Sorry,can't delete dir or file,maybe this dir is not exsited!!")
- return False
- else:
- return False
-
- else:
- self.logger.error("Sorry,can't create this %s dir in hadoop cluster!!1" % (path))
- self.Write_Debug_Log(self.status,remvoe_url)
-
- def rename(self,src,dst):
- try:
- c = pycurl.Curl()
- b = StringIO.StringIO()
- rename_str = '[{"op":"RENAME"}]'
- rename_url = "%s%s?op=RENAME&destination=%s" % (self.url_path,src,dst)
- c.setopt(pycurl.URL, rename_url)
- c.setopt(pycurl.HTTPHEADER,['Content-Type: application/json','Content-Length: '+str(len(rename_str))])
- c.setopt(pycurl.CUSTOMREQUEST,"PUT")
- c.setopt(pycurl.POSTFIELDS,rename_str)
- c.setopt(pycurl.WRITEFUNCTION, b.write)
- c.setopt(pycurl.FOLLOWLOCATION, 1)
- c.setopt(pycurl.MAXREDIRS, 5)
- c.perform()
- bbody = b.getvalue()
- self.status = c.getinfo(c.HTTP_CODE)
- except Exception,e:
- print e
- finally:
- c.close()
- b.close()
- if self.status == 200 :
- if "true" in body:
- self.logger.info("Great,Successfully rename dir or file %s in hadoop cluster!!" % (rename_url))
- return True
- elif "false" in body:
- self.logger.info("Sorry,can't rename dir or file,maybe this dir is not exsited!!")
- return False
- else:
- return False
-
- else:
- self.logger.error("Sorry,can't create this %s dir in hadoop cluster!!1" % (rename_url))
- self.Write_Debug_Log(self.status,rename_url)
-
- def put_file(self,local_path,hdfs_path,overwrite="true",permission="755",buffersize="128"):
- print "yes ,put fils ing!!!"
- try:
- c = pycurl.Curl()
- put_str = '[{"op":"CREATE","overwrite":overwrite,"permission":permission,"buffersize":buffersize}]'
- put_url = "%s%s?op=CREATE&overwrite=%s&permission=%s&buffersize=%s" % (self.url_path,hdfs_path,overwrite,permission,buffersize)
- c.setopt(pycurl.URL, put_url)
- header_str = StringIO.StringIO()
- c.setopt(pycurl.HTTPHEADER,['Content-Type: application/json','Content-Length: '+str(len(put_str))])
- c.setopt(pycurl.CUSTOMREQUEST,"PUT")
- c.setopt(pycurl.HEADER,1)
- c.setopt(pycurl.HEADERFUNCTION,header_str.write)
- c.setopt(pycurl.POSTFIELDS,put_str)
- b = StringIO.StringIO()
- c.setopt(pycurl.WRITEFUNCTION, b.write)
- c.setopt(pycurl.FOLLOWLOCATION, 1)
- c.setopt(pycurl.MAXREDIRS, 5)
- c.perform()
- redirect_url = c.getinfo(pycurl.EFFECTIVE_URL)
- except Exception,e:
- print e
-
- if os.path.isfile(local_path):
- try:
- f = file(local_path)
- filesize = os.path.getsize(local_path)
- c.setopt(pycurl.URL, redirect_url)
- c.setopt(pycurl.HEADER,1)
- c.setopt(pycurl.CUSTOMREQUEST,"PUT")
- c.setopt(pycurl.PUT,1)
- c.setopt(pycurl.INFILE,f)
- c.setopt(pycurl.INFILESIZE,filesize)
- c.setopt(pycurl.WRITEFUNCTION, b.write)
- c.setopt(pycurl.FOLLOWLOCATION, 1)
- c.setopt(pycurl.MAXREDIRS, 5)
- c.perform()
- print "yes.is ready to putting..."
- self.status = c.getinfo(c.HTTP_CODE)
- print b.getvalue()
- except Exception,e:
- print e
- finally:
- b.close()
- header_str.close()
- f.close()
- else:
- self.logger.error("Sorry,the %s is not existed,maybe it is not a file." % local_path)
- return False
-
-
- if self.status != 201:
- print self.status
- self.Write_Debug_Log(self.status,put_str)
- return False
- else:
- self.logger.info("Great,successfully put file into hdfs %s " % hdfs_path)
- return True
-
- def append(self,local_path,hdfs_path,buffersize=None):
- pass
-
-
-
- def get_file(self, local_path, hdfs_path,buffersize="128"):
-
- if not os.path.isfile(local_path):
- print local_path
- os.mknod(local_path)
- c = pycurl.Curl()
- f = file(local_path,'wb')
- put_str = '[{"op":"OPEN"}]'
- put_url = "%s%s?op=OPEN&buffersize=%s" % (self.url_path,hdfs_path,buffersize)
- try:
- print "yes .aaaaaaaaaaaaaaaaaaaaa"
- c.setopt(pycurl.URL, put_url)
- c.setopt(pycurl.HTTPHEADER,['Content-Type: application/json','Content-Length: '+str(len(put_str))])
- c.setopt(pycurl.CUSTOMREQUEST,"GET")
- f = file(local_path,'wb')
- c.setopt(pycurl.POSTFIELDS,put_str)
- c.setopt(pycurl.WRITEFUNCTION,f.write)
- c.setopt(pycurl.FOLLOWLOCATION, 1)
- c.setopt(pycurl.MAXREDIRS, 5)
- c.setopt(pycurl.CONNECTTIMEOUT,60)
- c.setopt(pycurl.TIMEOUT,300)
- c.perform()
-
- print c.getinfo(pycurl.HTTP_CODE)
- self.status = c.getinfo(pycurl.HTTP_CODE)
- except Exception,e:
- print e
- finally:
- c.close()
- f.close()
-
- if self.status != 200:
- print self.status
- self.Write_Debug_Log(self.status,put_str)
- return False
- else:
- self.logger.info("Great,successfully put file into hdfs %s " % hdfs_path)
- return True
-
-
-
- def cat_file(self, hdfs_path,buffersize="128"):
- c = pycurl.Curl()
- b = StringIO.StringIO()
- put_str = '[{"op":"OPEN"}]'
- put_url = "%s%s?op=OPEN&buffersize=%s" % (self.url_path,hdfs_path,buffersize)
- try:
- print "yes .ready to open"
- c.setopt(pycurl.URL, put_url)
- c.setopt(pycurl.HTTPHEADER,['Content-Type: application/json','Content-Length: '+str(len(put_str))])
- c.setopt(pycurl.CUSTOMREQUEST,"GET")
-
- c.setopt(pycurl.POSTFIELDS,put_str)
- c.setopt(pycurl.WRITEFUNCTION,b.write)
- c.setopt(pycurl.FOLLOWLOCATION, 1)
- c.setopt(pycurl.MAXREDIRS, 5)
- c.perform()
- self.status = c.getinfo(pycurl.HTTP_CODE)
- print c.getinfo(pycurl.HTTP_CODE)
- print "###-------------------------------------------###"
- print b.getvalue()
- except Exception,e:
- print e
- finally:
- c.close()
- b.close()
-
- if self.status != 200:
- print self.status
- self.Write_Debug_Log(self.status,put_str)
- return False
- else:
- self.logger.info("Great,successfully put file into hdfs %s " % hdfs_path)
- return True
-
- def copy_in_hdfs(self,src,dst,overwrite="true",permission="755",buffersize="128"):
- tmpfile = "/tmp/copy_inhdfs_tmpfile"
- self.get_file(tmpfile,src)
- if self.status == 200:
- self.put_file(tmpfile,dst,overwrite="true")
- if self.status == 201:
- os.remove(tmpfile)
- return True
- else:
- os.remove(tmpfile)
- return False
- else:
- os.remove(tmpfile)
- return False
-
-
- def Write_Debug_Log(self,status,url):
- if status != 200 or status != 201 :
- self.logger.error("Url : \"%s\" ,Exit code : %s"%(url,self.status))
- self.logger.error("fetch a error ,but don't quit")
-
-
-
-
采用curl的方式实现的功能和java自带的命令行工具比较,还是有些不足的
1,不支持hadoop内部文件copy
2,不支持目录上传或者下载
3,测试的时候, shell的方式上传,如果文件已经存在回报错;curl的方式上传默认参数必须是overwrite=true,才能成功,不知道为什么。
唯一的好处就是,执行的时间大大提高了。
同样一个列出目录列表的命令,
#time hadoop fs -ls hdfs://192.168.0.112:50081/
real 0m10.916s
user 0m4.082s
sys 0m6.799s
我有一个用户工厂。我希望默认情况下确认用户。但是鉴于unconfirmed特征,我不希望它们被确认。虽然我有一个基于实现细节而不是抽象的工作实现,但我想知道如何正确地做到这一点。factory:userdoafter(:create)do|user,evaluator|#unwantedimplementationdetailshereunlessFactoryGirl.factories[:user].defined_traits.map(&:name).include?(:unconfirmed)user.confirm!endendtrait:unconfirmeddoenden
只是想确保我理解了事情。据我目前收集到的信息,Cucumber只是一个“包装器”,或者是一种通过将事物分类为功能和步骤来组织测试的好方法,其中实际的单元测试处于步骤阶段。它允许您根据事物的工作方式组织您的测试。对吗? 最佳答案 有点。它是一种组织测试的方式,但不仅如此。它的行为就像最初的Rails集成测试一样,但更易于使用。这里最大的好处是您的session在整个Scenario中保持透明。关于Cucumber的另一件事是您(应该)从使用您的代码的浏览器或客户端的角度进行测试。如果您愿意,您可以使用步骤来构建对象和设置状态,但通常您
华为OD机试题本篇题目:明明的随机数题目输入描述输出描述:示例1输入输出说明代码编写思路最近更新的博客华为od2023|什么是华为od,od薪资待遇,od机试题清单华为OD机试真题大全,用Python解华为机试题|机试宝典【华为OD机试】全流程解析+经验分享,题型分享,防作弊指南华为o
C#实现简易绘图工具一.引言实验目的:通过制作窗体应用程序(C#画图软件),熟悉基本的窗体设计过程以及控件设计,事件处理等,熟悉使用C#的winform窗体进行绘图的基本步骤,对于面向对象编程有更加深刻的体会.Tutorial任务设计一个具有基本功能的画图软件**·包括简单的新建文件,保存,重新绘图等功能**·实现一些基本图形的绘制,包括铅笔和基本形状等,学习橡皮工具的创建**·设计一个合理舒适的UI界面**注明:你可能需要先了解一些关于winform窗体应用程序绘图的基本知识,以及关于GDI+类和结构的知识二.实验环境Windows系统下的visualstudio2017C#窗体应用程序三.
MIMO技术的优缺点优点通过下面三个增益来总体概括:阵列增益。阵列增益是指由于接收机通过对接收信号的相干合并而活得的平均SNR的提高。在发射机不知道信道信息的情况下,MIMO系统可以获得的阵列增益与接收天线数成正比复用增益。在采用空间复用方案的MIMO系统中,可以获得复用增益,即信道容量成倍增加。信道容量的增加与min(Nt,Nr)成正比分集增益。在采用空间分集方案的MIMO系统中,可以获得分集增益,即可靠性性能的改善。分集增益用独立衰落支路数来描述,即分集指数。在使用了空时编码的MIMO系统中,由于接收天线或发射天线之间的间距较远,可认为它们各自的大尺度衰落是相互独立的,因此分布式MIMO
遍历文件夹我们通常是使用递归进行操作,这种方式比较简单,也比较容易理解。本文为大家介绍另一种不使用递归的方式,由于没有使用递归,只用到了循环和集合,所以效率更高一些!一、使用递归遍历文件夹整体思路1、使用File封装初始目录,2、打印这个目录3、获取这个目录下所有的子文件和子目录的数组。4、遍历这个数组,取出每个File对象4-1、如果File是否是一个文件,打印4-2、否则就是一个目录,递归调用代码实现publicclassSearchFile{publicstaticvoidmain(String[]args){//初始目录Filedir=newFile("d:/Dev");Datebeg
1.1.1 YARN的介绍 为克服Hadoop1.0中HDFS和MapReduce存在的各种问题⽽提出的,针对Hadoop1.0中的MapReduce在扩展性和多框架⽀持⽅⾯的不⾜,提出了全新的资源管理框架YARN. ApacheYARN(YetanotherResourceNegotiator的缩写)是Hadoop集群的资源管理系统,负责为计算程序提供服务器计算资源,相当于⼀个分布式的操作系统平台,⽽MapReduce等计算程序则相当于运⾏于操作系统之上的应⽤程序。 YARN被引⼊Hadoop2,最初是为了改善MapReduce的实现,但是因为具有⾜够的通⽤性,同样可以⽀持其他的分布式计算模
通常,数组被实现为内存块,集合被实现为HashMap,有序集合被实现为跳跃列表。在Ruby中也是如此吗?我正在尝试从性能和内存占用方面评估Ruby中不同容器的使用情况 最佳答案 数组是Ruby核心库的一部分。每个Ruby实现都有自己的数组实现。Ruby语言规范只规定了Ruby数组的行为,并没有规定任何特定的实现策略。它甚至没有指定任何会强制或至少建议特定实现策略的性能约束。然而,大多数Rubyist对数组的性能特征有一些期望,这会迫使不符合它们的实现变得默默无闻,因为实际上没有人会使用它:插入、前置或追加以及删除元素的最坏情况步骤复
在ruby中,你可以这样做:classThingpublicdeff1puts"f1"endprivatedeff2puts"f2"endpublicdeff3puts"f3"endprivatedeff4puts"f4"endend现在f1和f3是公共(public)的,f2和f4是私有(private)的。内部发生了什么,允许您调用一个类方法,然后更改方法定义?我怎样才能实现相同的功能(表面上是创建我自己的java之类的注释)例如...classThingfundeff1puts"hey"endnotfundeff2puts"hey"endendfun和notfun将更改以下函数定
在Rails自动生成的功能测试(test/functional/products_controller_test.rb)中,我看到以下代码:classProductsControllerTest我的问题是:方法调用products()在哪里/如何定义?products(:one)到底是什么意思?看代码,大概意思是“创建一个产品”,但是它是如何工作的呢?注意我是Ruby/Rails的新手,如果这些是微不足道的问题,我深表歉意。 最佳答案 如果您查看test/fixtures文件夹,您会看到一个products.yml文件。这是在您创建