在运行Vista x64 Business和Visual Studio 2008 SP1的计算机(四核,8GB内存)上,我试图非常快速地将两组数字相交。
我已经在C++中实现了两种方法,而在C#中实现了一种。到目前为止,C#方法更快,我想改进C++方法,使其比C#更快,我希望C++可以做到。
这是C#输出:(发布版本)
Found the intersection 1000 times, in 4741.407 ms
Found the intersection (using unordered_map) 1000 times, in 21580.7ms
Found the intersection (using set_intersection) 1000 times, in 22366.6ms
Found the intersection of 504 values (using unordered_map) 1000 times, in 28827.6ms
Found the intersection of 495 values (using set_intersection) 1000 times, in 9817.69ms
Found the intersection of 504 values (using unordered_set) 1000 times, in 24769.1ms
Code:
// MapPerformance.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>
#include <unordered_set>
#include <boost\unordered\unordered_map.hpp>
#include "timer.h"
using namespace std;
using namespace stdext;
using namespace boost;
using namespace tr1;
int runIntersectionTest2(const vector<int>& set1, const vector<int>& set2)
{
// hash_map<int,int> theMap;
// map<int,int> theMap;
unordered_set<int> theSet;
theSet.insert( set1.begin(), set1.end() );
int intersectionSize = 0;
vector<int>::const_iterator set2_end = set2.end();
for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
{
if ( theSet.find(*iterator) != theSet.end() )
{
intersectionSize++;
}
}
return intersectionSize;
}
int runIntersectionTest(const vector<int>& set1, const vector<int>& set2)
{
// hash_map<int,int> theMap;
// map<int,int> theMap;
unordered_map<int,int> theMap;
vector<int>::const_iterator set1_end = set1.end();
// Now intersect the two sets by populating the map
for ( vector<int>::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
vector<int>::const_iterator set2_end = set2.end();
for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
{
int value = *iterator;
unordered_map<int,int>::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int runSetIntersection(const vector<int>& set1_unsorted, const vector<int>& set2_unsorted)
{
// Create two vectors
std::vector<int> set1(set1_unsorted.size());
std::vector<int> set2(set2_unsorted.size());
// Copy the unsorted data into them
std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin());
std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin());
// Sort the data
sort(set1.begin(),set1.end());
sort(set2.begin(),set2.end());
vector<int> intersection;
intersection.reserve(1000);
set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), back_inserter(intersection));
return intersection.size();
}
void createSets( vector<int>& set1, vector<int>& set2 )
{
srand ( time(NULL) );
set1.reserve(100000);
set2.reserve(1000);
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Try to get half of our values intersecting
float ratio = 200000.0f / RAND_MAX;
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() * ratio + 1;
int value = 1000000000 + random;
set2.push_back(value);
}
// Make sure set1 is in random order (not sorted)
random_shuffle(set1.begin(),set1.end());
}
int _tmain(int argc, _TCHAR* argv[])
{
int intersectionSize = 0;
vector<int> set1, set2;
createSets( set1, set2 );
Timer timer;
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runIntersectionTest(set1, set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
timer.Reset();
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runSetIntersection(set1,set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
timer.Reset();
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runIntersectionTest2(set1,set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using unordered_set) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
getchar();
return 0;
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace DictionaryPerformance
{
class Program
{
static void Main(string[] args)
{
List<int> set1 = new List<int>(100000);
List<int> set2 = new List<int>(1000);
// Create 100,000 values for set1
for (int i = 0; i < 100000; i++)
{
int value = 1000000000 + i;
set1.Add(value);
}
Random random = new Random(DateTime.Now.Millisecond);
// Create 1,000 values for set2
for (int i = 0; i < 1000; i++)
{
int value = 1000000000 + (random.Next() % 200000 + 1);
set2.Add(value);
}
long start = System.Diagnostics.Stopwatch.GetTimestamp();
for (int i = 0; i < 1000; i++)
{
runIntersectionTest(set1,set2);
}
long duration = System.Diagnostics.Stopwatch.GetTimestamp() - start;
Console.WriteLine(String.Format("Found the intersection 1000 times, in {0} ms", ((float) duration * 1000.0f) / System.Diagnostics.Stopwatch.Frequency));
Console.ReadKey();
}
static int runIntersectionTest(List<int> set1, List<int> set2)
{
Dictionary<int,int> theMap = new Dictionary<int,int>(100000);
// Now intersect the two sets by populating the map
foreach( int value in set1 )
{
theMap[value] = 1;
}
int intersectionSize = 0;
foreach ( int value in set2 )
{
int count;
if ( theMap.TryGetValue(value, out count ) )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
}
}
// MapPerformance.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>
#include <boost\unordered\unordered_map.hpp>
#include "timer.h"
using namespace std;
using namespace stdext;
using namespace boost;
int runIntersectionTest(vector<int> set1, vector<int> set2)
{
// hash_map<int,int> theMap;
// map<int,int> theMap;
unordered_map<int,int> theMap;
// Now intersect the two sets by populating the map
for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
{
int value = *iterator;
unordered_map<int,int>::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int runSetIntersection(set<int> set1, set<int> set2)
{
set<int> intersection;
set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end()));
return intersection.size();
}
int _tmain(int argc, _TCHAR* argv[])
{
srand ( time(NULL) );
vector<int> set1;
vector<int> set2;
set1.reserve(10000);
set2.reserve(1000);
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() % 200000 + 1;
random *= 10;
int value = 1000000000 + random;
set2.push_back(value);
}
Timer timer;
for ( int i = 0; i < 1000; i++ )
{
runIntersectionTest(set1, set2);
}
timer.Stop();
cout << "Found the intersection (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
set<int> set21;
set<int> set22;
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set21.insert(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() % 200000 + 1;
random *= 10;
int value = 1000000000 + random;
set22.insert(value);
}
timer.Reset();
for ( int i = 0; i < 1000; i++ )
{
runSetIntersection(set21,set22);
}
timer.Stop();
cout << "Found the intersection (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
getchar();
return 0;
}
Found the intersection of 503 values (using unordered_map) 1000 times, in 35131.1ms
Found the intersection of 494 values (using set_intersection) 1000 times, in 10317ms
// MapPerformance.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>
#include <boost\unordered\unordered_map.hpp>
#include "timer.h"
using namespace std;
using namespace stdext;
using namespace boost;
int runIntersectionTest(const vector<int>& set1, const vector<int>& set2)
{
// hash_map<int,int> theMap;
// map<int,int> theMap;
unordered_map<int,int> theMap;
vector<int>::const_iterator set1_end = set1.end();
// Now intersect the two sets by populating the map
for ( vector<int>::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
vector<int>::const_iterator set2_end = set2.end();
for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
{
int value = *iterator;
unordered_map<int,int>::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int runSetIntersection(const vector<int> set1_unsorted, const vector<int> set2_unsorted)
{
// Create two vectors
std::vector<int> set1(set1_unsorted.size());
std::vector<int> set2(set2_unsorted.size());
// Copy the unsorted data into them
std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin());
std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin());
// Sort the data
sort(set1.begin(),set1.end());
sort(set2.begin(),set2.end());
vector<int> intersection;
intersection.reserve(1000);
set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end()));
return intersection.size();
}
void createSets( vector<int>& set1, vector<int>& set2 )
{
srand ( time(NULL) );
set1.reserve(100000);
set2.reserve(1000);
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Try to get half of our values intersecting
float ratio = 200000.0f / RAND_MAX;
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() * ratio + 1;
int value = 1000000000 + random;
set2.push_back(value);
}
// Make sure set1 is in random order (not sorted)
random_shuffle(set1.begin(),set1.end());
}
int _tmain(int argc, _TCHAR* argv[])
{
int intersectionSize = 0;
vector<int> set1, set2;
createSets( set1, set2 );
Timer timer;
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runIntersectionTest(set1, set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
timer.Reset();
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runSetIntersection(set1,set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
getchar();
return 0;
}
最佳答案
您的测试有几个问题。
首先,您不是在测试集合交集,而是“创建几个数组,用随机数填充它们,然后执行集合交集”。您应该只计时您真正感兴趣的那部分代码。即使您想做这些事情,也不应在此处对其进行基准测试。一次测量一件事,以减少不确定性。如果希望C++实现的性能更好,则首先需要知道它的哪一部分比预期的要慢。这意味着您必须将设置代码与交叉测试分开。
其次,您应该多次运行测试,以考虑可能的缓存效果和其他不确定性。 (并且可能总共输出1000次运行的总时间,而不是每次输出一次。这样可以减少计时器的不确定性,因为计时器的分辨率可能有限,并且在0-20ms范围内使用时报告的结果不准确。
此外,据我可以从文档中读取的内容,应该对set_intersection的输入进行排序,而set2不会。似乎没有理由使用unordered_map,而unordered_set将更适合您的工作。
关于所需的设置代码,请注意,您可能不需要填充 vector 即可运行相交。您自己的实现和set_intersection都已在迭代器上运行,因此您只需将它们的一对迭代器传递给您输入中已经存在的数据结构即可。
有关您的代码的其他一些具体注释:
++iterator代替iterator++ unordered_set(不是unordered_map)进行实验std::set_intersection on std::set: 2606ms
std::set_intersection on tr1::unordered_set: 1014ms
std::set_intersection on sorted vectors: 171ms
std::set_intersection on unsorted vectors: 10140ms
#define _SECURE_SCL 0
#include <ctime>
#include <vector>
#include <set>
#include <iostream>
#include <algorithm>
#include <unordered_set>
#include <windows.h>
template <typename T, typename OutIter>
void stl_intersect(const T& set1, const T& set2, OutIter out){
std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), out);
}
template <typename T, typename OutIter>
void sort_stl_intersect(T& set1, T& set2, OutIter out){
std::sort(set1.begin(), set1.end());
std::sort(set2.begin(), set2.end());
std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), out);
}
template <typename T>
void init_sorted_vec(T first, T last){
for ( T cur = first; cur != last; ++cur)
{
int i = cur - first;
int value = 1000000000 + i;
*cur = value;
}
}
template <typename T>
void init_unsorted_vec(T first, T last){
for ( T cur = first; cur != last; ++cur)
{
int i = rand() % 200000 + 1;
i *= 10;
int value = 1000000000 + i;
*cur = value;
}
}
struct resize_and_shuffle {
resize_and_shuffle(int size) : size(size) {}
void operator()(std::vector<int>& vec){
vec.resize(size);
}
int size;
};
int main()
{
srand ( time(NULL) );
std::vector<int> out(100000);
std::vector<int> sortedvec1(100000);
std::vector<int> sortedvec2(1000);
init_sorted_vec(sortedvec1.begin(), sortedvec1.end());
init_unsorted_vec(sortedvec2.begin(), sortedvec2.end());
std::sort(sortedvec2.begin(), sortedvec2.end());
std::vector<int> unsortedvec1(sortedvec1.begin(), sortedvec1.end());
std::vector<int> unsortedvec2(sortedvec2.begin(), sortedvec2.end());
std::random_shuffle(unsortedvec1.begin(), unsortedvec1.end());
std::random_shuffle(unsortedvec2.begin(), unsortedvec2.end());
std::vector<int> vecs1[1000];
std::vector<int> vecs2[1000];
std::fill(vecs1, vecs1 + 1000, unsortedvec1);
std::fill(vecs2, vecs2 + 1000, unsortedvec2);
std::set<int> set1(sortedvec1.begin(), sortedvec1.end());
std::set<int> set2(sortedvec2.begin(), sortedvec2.end());
std::tr1::unordered_set<int> uset1(sortedvec1.begin(), sortedvec1.end());
std::tr1::unordered_set<int> uset2(sortedvec2.begin(), sortedvec2.end());
DWORD start, stop;
DWORD delta[4];
start = GetTickCount();
for (int i = 0; i < 1000; ++i){
stl_intersect(set1, set2, out.begin());
}
stop = GetTickCount();
delta[0] = stop - start;
start = GetTickCount();
for (int i = 0; i < 1000; ++i){
stl_intersect(uset1, uset2, out.begin());
}
stop = GetTickCount();
delta[1] = stop - start;
start = GetTickCount();
for (int i = 0; i < 1000; ++i){
stl_intersect(sortedvec1, sortedvec2, out.begin());
}
stop = GetTickCount();
delta[2] = stop - start;
start = GetTickCount();
for (int i = 0; i < 1000; ++i){
sort_stl_intersect(vecs1[i], vecs1[i], out.begin());
}
stop = GetTickCount();
delta[3] = stop - start;
std::cout << "std::set_intersection on std::set: " << delta[0] << "ms\n";
std::cout << "std::set_intersection on tr1::unordered_set: " << delta[1] << "ms\n";
std::cout << "std::set_intersection on sorted vectors: " << delta[2] << "ms\n";
std::cout << "std::set_intersection on unsorted vectors: " << delta[3] << "ms\n";
return 0;
}
malloc操作。在.NET中,堆分配只不过是向指针添加偏移量而已。set_difference仅适用于排序的输入,并且为了重现涉及排序的测试结果,我们必须在每次迭代中复制未排序的数据的新拷贝,这非常昂贵(尽管再次使用自定义分配器会有所帮助)很多)。我不知道您的输入采用什么形式,但是有可能您可以直接对输入进行排序,而无需复制它,然后直接在其上运行set_difference。 (如果您的输入至少是一个数组或一个STL容器,那将很容易做到。)std::sort和set_intersection来摆脱困境。关于c++ - 集: C++ vs C#的快速交集,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/1060648/
我的瘦服务器配置了nginx,我的ROR应用程序正在它们上运行。在我发布代码更新时运行thinrestart会给我的应用程序带来一些停机时间。我试图弄清楚如何优雅地重启正在运行的Thin实例,但找不到好的解决方案。有没有人能做到这一点? 最佳答案 #Restartjustthethinserverdescribedbythatconfigsudothin-C/etc/thin/mysite.ymlrestartNginx将继续运行并代理请求。如果您将Nginx设置为使用多个上游服务器,例如server{listen80;server
如何将send与+=一起使用?a=20;a.send"+=",10undefinedmethod`+='for20:Fixnuma=20;a+=10=>30 最佳答案 恐怕你不能。+=不是方法,而是语法糖。参见http://www.ruby-doc.org/docs/ProgrammingRuby/html/tut_expressions.html它说Incommonwithmanyotherlanguages,Rubyhasasyntacticshortcut:a=a+2maybewrittenasa+=2.你能做的最好的事情是:
我对如何计算通过{%assignvar=0%}赋值的变量加一完全感到困惑。这应该是最简单的任务。到目前为止,这是我尝试过的:{%assignamount=0%}{%forvariantinproduct.variants%}{%assignamount=amount+1%}{%endfor%}Amount:{{amount}}结果总是0。也许我忽略了一些明显的东西。也许有更好的方法。我想要存档的只是获取运行的迭代次数。 最佳答案 因为{{incrementamount}}将输出您的变量值并且不会影响{%assign%}定义的变量,我
我有一个数组数组,想将元素附加到子数组。+=做我想做的,但我想了解为什么push不做。我期望的行为(并与+=一起工作):b=Array.new(3,[])b[0]+=["apple"]b[1]+=["orange"]b[2]+=["frog"]b=>[["苹果"],["橙子"],["Frog"]]通过推送,我将推送的元素附加到每个子数组(为什么?):a=Array.new(3,[])a[0].push("apple")a[1].push("orange")a[2].push("frog")a=>[[“苹果”、“橙子”、“Frog”]、[“苹果”、“橙子”、“Frog”]、[“苹果”、“
有没有办法让Ruby能够做这样的事情?classPlane@moved=0@x=0defx+=(v)#thisiserror@x+=v@moved+=1enddefto_s"moved#{@moved}times,currentxis#{@x}"endendplane=Plane.newplane.x+=5plane.x+=10putsplane.to_s#moved2times,currentxis15 最佳答案 您不能在Ruby中覆盖复合赋值运算符。任务在内部处理。您应该覆盖+,而不是+=。plane.a+=b与plane.a=
有没有办法快速将表格格式的ruby哈希打印到文件中?如:keyAkeyBkeyC...1232343451253474456...其中散列的值是不同大小的数组。还是使用双循环是唯一的方法?谢谢 最佳答案 试试我写的这个gem(在表中打印散列、ruby对象、ActiveRecord对象):http://github.com/arches/table_print 关于ruby-如何以表格格式快速打印Ruby哈希值?,我们在StackOverflow上找到一个类似的问题:
出于某种原因,heroku尝试要求dm-sqlite-adapter,即使它应该在这里使用Postgres。请注意,这发生在我打开任何URL时-而不是在gitpush本身期间。我构建了一个默认的Facebook应用程序。gem文件:source:gemcuttergem"foreman"gem"sinatra"gem"mogli"gem"json"gem"httparty"gem"thin"gem"data_mapper"gem"heroku"group:productiondogem"pg"gem"dm-postgres-adapter"endgroup:development,:t
我是Ruby和这个网站的新手。下面两个函数是不同的,一个在函数外修改变量,一个不修改。defm1(x)x我想确保我理解正确-当调用m1时,对str的引用被复制并传递给将其视为x的函数。运算符当调用m2时,对str的引用被复制并传递给将其视为x的函数。运算符+创建一个新字符串,赋值x=x+"4"只是将x重定向到新字符串,而原始str变量保持不变。对吧?谢谢 最佳答案 String#+::str+other_str→new_strConcatenation—ReturnsanewStringcontainingother_strconc
我正在使用PostgreSQL9.1.3(x86_64-pc-linux-gnu上的PostgreSQL9.1.3,由gcc-4.6.real(Ubuntu/Linaro4.6.1-9ubuntu3)4.6.1,64位编译)和在ubuntu11.10上运行3.2.2或3.2.1。现在,我可以使用以下命令连接PostgreSQLsupostgres输入密码我可以看到postgres=#我将以下详细信息放在我的config/database.yml中并执行“railsdb”,它工作正常。开发:adapter:postgresqlencoding:utf8reconnect:falsedat
这是我在ChefRecipe中的一blockRuby:#ifdatadirdoesn'texist,moveoverthedefaultoneif!File.exist?("/vol/postgres/data")execute"mv/var/lib/postgresql/9.1/main/vol/postgres/data"end结果是:Executingmv/var/lib/postgresql/9.1/main/vol/postgres/datamv:inter-devicemovefailed:`/var/lib/postgresql/9.1/main'to`/vol/post