My Road To Computer Science

Wednesday, April 3, 2013

Weird Combiner in Hadoop

I write my own MapReduce functions for K means problem.

When I test my code using a small dataset(Small than 1M), it works very well. However, when I apply it to larger one(More than 100M), it returns errors.

Actually, I am not so sure about my idea. But I believe it is true at 90%.

When I test small dataset, combiner just be called once between mapper and reducer.

However, when I test large dataset, combiner is called more than once between one pair of mapper and reducer.(It is so weird, and I am not so sure, because I haven't study hadoop source code)

You can find my source code for K-Means here:
https://github.com/zhouhao/Hadoop_KMeans_MapReduce_Java/

Attention for strtok() in C

Head file:#include<string.h>

Function prototype: char * strtok(char *s,const char *delim);

Description:divide string s into pieces according to string delim

strtok() scans s, if it finds the current character is in delim(delim is a character set), then strtok() replaces current character with '\0'[this means string s is changed]
At first, strtok() needs s as a parameter, but then, we can call strtok() by set s as NULL, strtok() will return the next string point(If there is no strings any more, strtok() will return NULL).

#include<stdio.h>

#include<string.h>
int main()
{
char s[]="ab-c656f;gh,i-jkl;mnop;54;fdfdz";
char *delim="-, ";
char *p;
printf("%s ";strtok(s,delim));

//printf("%s ";s); //s is changed
while((p=strtok(NULL,delim)))

{

printf("%s ",p);

}
printf("\n");

return 0;
}
-----------------------My work for Strtok()-----------------

int numOfParametersCheck(const uchar *str,const int num)
{
int numParameters=0;
uchar cpyStr[MAXLEN_CMD];
strcpy(cpyStr,str);//I duplicate a new string
strtok(cpyStr," ");
while(strtok(NULL," ")!=NULL)
{
numParameters++;
if(numParameters>num)
{
printf("Cmd should have %d parameter!\n\n",num);
return 0;
}
}
if(numParameters==num)
{
return 1;
}
else //numParameters<num
{
printf("Cmd should have %d parameter!\n\n",num);
return 0;
}
}

Monday, April 1, 2013

LINK : fatal error LNK1123: failure during conversion to COFF: file invalid or corrupt

When I re-install my OS with Win7(X64).

I installed .net FrameWork 4.5 before VS2010. So when I run C(C++)project in VS2010, it occurs with an error like the title.

How to fix it:

Project Properties 
   -> Configuration Properties 
       -> Linker (General) 
          -> Enable Incremental Linking -> "No (/INCREMENTAL:NO)"

Only this way works without re-install anything.

reference:http://stackoverflow.com/questions/10888391/link-fatal-error-lnk1123-failure-during-conversion-to-coff-file-invalid-or-c

Tuesday, September 18, 2012

8 QUEENS - JAVA

FROM:http://www.cnblogs.com/qinyg/archive/2012/05/21/2512353.html

package sideway;

public class sideway {
public static int COLOM_NUMBER=9;
public static int X[]=new int[COLOM_NUMBER];
public static boolean place(int k)//考察皇后k放置在X[k]列是否发生冲突
{
int i;
for(i=0;i<k;i++)
if(X[k]==X[i]||Math.abs(k-i)==Math.abs(X[k]-X[i]))
return false;
return true;//true代表有冲突
}
public static void queue()
{
int k;
for(int i=0;i<COLOM_NUMBER;i++)
X[i]=-1;
X[0]=(int) (Math.random() * 1000) % (COLOM_NUMBER);//X[0]为1和7的时候出错
k=1;
while(true)
{

X[k]=X[k]+1; //在下一列放置第k个皇后

while(X[k]<COLOM_NUMBER && place(k)==false)
X[k]=X[k]+1;//搜索下一列
//成功得到一个结果，输出结果
if(X[k]<COLOM_NUMBER && k==COLOM_NUMBER-1)
{
for(int i=0;i<COLOM_NUMBER;i++)
System.out.print(X[i]+" ");
System.out.println("succeed!");
return;
}

else
if(X[k]<COLOM_NUMBER && k<COLOM_NUMBER-1)
k=k+1;//放置下一个皇后
else
{
X[k]=-1;//重置X[k],回溯,我觉得这称得上是sideway的思想，考虑的是列的local maximum！
k=k-1;
}
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
queue();
System.out.println("done!");
}

}

Monday, September 17, 2012

8 QUEENS -GENETIC -C

FROM:http://curely.blog.51cto.com/1627940/497963

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define N 16//父母产生的代数

//交叉函数，返回值为交叉后的子代,具体原理不在这里阐述，在设计文档中有详细说明
char *crossover(char* father,char* mother)//返回son[8]
{
int i,j,k;
char son[16],rnd[16],fath[16],moth[16];

for ( i = 0; i < 8; i++)//生成交叉算子
{
rnd[i]=rand()%2+'0';
}
strcpy(fath,father);
strcpy(moth,mother);

for ( i = 0; i < 8; i ++)//根据交叉算子对父母进行交叉
{
if (rnd[i] == '1')
{
for ( j = 0; j < 8; j++)
{
if (fath[j] != '0')
{
break;
}
}
son[i]=fath[j];
for ( k = 0; k < 8; k++)
{
if (moth[k] == fath[j])
{
moth[k]='0';
break;
}
}
fath[j]='0';
}
else
{
for ( j = 0; j < 8; j++)
{
if (moth[j] != '0')
{
break;
}
}
son[i]=moth[j];
for ( k = 0; k < 8; k++)
{
if (fath[k] == moth[j])
{
fath[k]='0';
break;
}
}
moth[j]='0';
}
}
son[8]=0;
return son;
}

int check(char result[])//检查有多少在一条线上的皇后
{
int i,j,k=0;
for ( i = 0; i < 8; i++)
{
for ( j = i+1; j < 8; j++)
{
if (abs(result[i]-result[j]) == j-i)
{
k++;
}
}
}
return k;
}

int main()
{
int i,j,k,p,q,min,max=0,flag,sign,boundary=1000,s[N];
char father[16]="12345678",mother[16]="87654321";
char son[N][16],temp[16],result[100][16];
srand((unsigned)time(NULL));
for ( i = 0; i < 8; i++)//生成父母串
{
j=rand()%8;
k=father[j];
father[j]=father[i];
father[i]=k;
j=rand()%8;
k=mother[j];
mother[j]=mother[i];
mother[i]=k;
}
sign=0;
for ( i = 0; i <= boundary; i++)//控制繁殖代数
{
if (i == boundary&&sign == 1)//动态控制，如果发现不够继续繁殖3000代
{
boundary+=3000;
sign=0;
}

for ( j = 0; j < N; j++)//交叉和变异，对子代评估
{
strcpy(son[j],crossover(mother,father));
p=rand()%8;
q=rand()%8;
k=son[j][p];
son[j][p]=son[j][q];
son[j][q]=k;
s[j]=check(son[j]);
}

for ( j = 0; j < N; j++)//对评估结果排序，将已经符合条件的挑出
{
for ( k = j, min = j; k < N; k++)
{
if (s[min] > s[k])
{
min=k;
}
}
k=s[j];
s[j]=s[min];
s[min]=k;
strcpy(temp,son[j]);
strcpy(son[j],son[min]);
strcpy(son[min],temp);
if (s[j] == 0)
{
flag=0;
for ( k = 0; k < max; k++)
{
if (!strcmp(son[j],result[k]))
{
flag=1;
}
}
if (!flag)
{
sign=1;
strcpy(result[max],son[j]);//符合的放入结果数组
max++;//已经产生的符合条件的子代
printf("%02d.%s\n",max,son[j]);
}
}
}

strcpy(father,son[0]);//选择两个最优的成为下一代的父母
strcpy(mother,son[1]);
}
printf("%d\n",boundary);//最终繁殖代数
system("pause");
return 0;
}

This algorithm lists all the situations!
GREAT!!!

intro to this blog

I want to be stronger, so I am here to record pieces of my life of computer science!

Cheers!