Wednesday, April 3, 2013

Weird Combiner in Hadoop

I write my own MapReduce functions for K means problem.

When I test my code using a small dataset(Small than 1M), it works very well. However, when I apply it to larger one(More than 100M), it returns errors.

Actually, I am not so sure about my idea. But I believe it is true at 90%.


  • When I test small dataset, combiner just be called once between mapper and reducer.

  • However, when I test large dataset, combiner is called more than once between one pair of mapper and reducer.(It is so weird, and I am not so sure, because I haven't study hadoop source code)
You can find my source code for K-Means here:
https://github.com/zhouhao/Hadoop_KMeans_MapReduce_Java/

Attention for strtok() in C



Head file:#include<string.h>
Function prototype: char * strtok(char *s,const char *delim);
Description:divide string s into pieces according to string delim
  1. strtok() scans s, if it finds the current character is in delim(delim is a character set), then strtok() replaces current character with '\0'[this means string s is changed]
  2. At first, strtok() needs s as a parameter, but then, we can call strtok() by set s as NULL, strtok() will return the next string point(If there is no strings any more, strtok() will return NULL).
#include<stdio.h>
#include<string.h>
int main()
{
    char s[]="ab-c656f;gh,i-jkl;mnop;54;fdfdz";
    char *delim="-, ";
    char *p;
    printf("%s ";strtok(s,delim));
    //printf("%s ";s); //s is changed
    while((p=strtok(NULL,delim)))
    {
        printf("%s ",p);
    }
    printf("\n");
    return 0;
}

-----------------------My work for Strtok()-----------------

int numOfParametersCheck(const uchar *str,const int num)
{
    int numParameters=0;
uchar cpyStr[MAXLEN_CMD];
strcpy(cpyStr,str);//I duplicate a new string 
strtok(cpyStr," ");
while(strtok(NULL," ")!=NULL)
{
   numParameters++;
   if(numParameters>num)
   {
       printf("Cmd should have %d parameter!\n\n",num);
            return 0;
   }
}
if(numParameters==num)
{
   return 1;
}
else  //numParameters<num
{
        printf("Cmd should have %d parameter!\n\n",num);
   return 0;
}
}

Monday, April 1, 2013

LINK : fatal error LNK1123: failure during conversion to COFF: file invalid or corrupt

When I re-install my OS with Win7(X64).
I installed .net FrameWork 4.5 before VS2010. So when I run C(C++)project in VS2010, it occurs with an error like the title.

How to fix it:
Project Properties 
   -> Configuration Properties 
       -> Linker (General) 
          -> Enable Incremental Linking -> "No (/INCREMENTAL:NO)"
Only this way works without re-install anything.


reference:http://stackoverflow.com/questions/10888391/link-fatal-error-lnk1123-failure-during-conversion-to-coff-file-invalid-or-c

Tuesday, September 18, 2012

8 QUEENS - JAVA


FROM:http://www.cnblogs.com/qinyg/archive/2012/05/21/2512353.html


package sideway;

public class sideway {
public static int COLOM_NUMBER=9;
public static int X[]=new int[COLOM_NUMBER]; 
public static boolean  place(int k)//考察皇后k放置在X[k]列是否发生冲突
{
   int i;
   for(i=0;i<k;i++)
       if(X[k]==X[i]||Math.abs(k-i)==Math.abs(X[k]-X[i]))
           return false;
       return true;//true代表有冲突
}
public static void queue()
{
   int k;
   for(int i=0;i<COLOM_NUMBER;i++)
       X[i]=-1;    
   X[0]=(int) (Math.random() * 1000) % (COLOM_NUMBER);//X[0]为1和7的时候出错
   k=1;
   while(true)
   {

X[k]=X[k]+1;   //在下一列放置第k个皇后
   
       while(X[k]<COLOM_NUMBER && place(k)==false)
           X[k]=X[k]+1;//搜索下一列
       //成功得到一个结果,输出结果
       if(X[k]<COLOM_NUMBER && k==COLOM_NUMBER-1)
       {
        for(int i=0;i<COLOM_NUMBER;i++)
        System.out.print(X[i]+" ");
           System.out.println("succeed!");
return;
       }

       else 
if(X[k]<COLOM_NUMBER && k<COLOM_NUMBER-1)
k=k+1;//放置下一个皇后
else
{
X[k]=-1;//重置X[k],回溯,我觉得这称得上是sideway的思想,考虑的是列的local maximum!
k=k-1;
}
   }
}
public static void main(String[] args) {
// TODO Auto-generated method stub
queue();
System.out.println("done!");
}

}

Monday, September 17, 2012

8 QUEENS -GENETIC -C

FROM:http://curely.blog.51cto.com/1627940/497963


#include <stdio.h>  
#include <stdlib.h>  
#include <string.h>  
#include <time.h>  
#define N 16//父母产生的代数  

 //交叉函数,返回值为交叉后的子代,具体原理不在这里阐述,在设计文档中有详细说明  
char *crossover(char* father,char* mother)//返回son[8]
{  
    int i,j,k;  
    char son[16],rnd[16],fath[16],moth[16]; 

    for ( i = 0; i < 8; i++)//生成交叉算子  
    {  
        rnd[i]=rand()%2+'0';   
    }  
    strcpy(fath,father);  
    strcpy(moth,mother);  

    for ( i = 0; i < 8; i ++)//根据交叉算子对父母进行交叉  
    {  
        if (rnd[i] == '1')  
        {  
            for ( j = 0; j < 8; j++)  
            {  
                if (fath[j] != '0')  
                {  
                    break;  
                }  
            }  
            son[i]=fath[j];  
            for ( k = 0; k < 8; k++)  
            {  
                if (moth[k] == fath[j])  
                {  
                    moth[k]='0';  
                    break;  
                }  
            }  
            fath[j]='0';  
        }  
        else 
        {  
            for ( j = 0; j < 8; j++)  
            {  
                if (moth[j] != '0')  
                {  
                    break;  
                }  
            }  
            son[i]=moth[j];  
            for ( k = 0; k < 8; k++)  
            {  
                if (fath[k] == moth[j])  
                {  
                    fath[k]='0';  
                    break;  
                }  
            }  
            moth[j]='0';  
        }  
    }  
    son[8]=0;  
    return son;  
}  


int check(char result[])//检查有多少在一条线上的皇后  
{  
    int i,j,k=0;  
    for ( i = 0; i < 8; i++)  
    {  
        for ( j = i+1; j < 8; j++)  
        {  
            if (abs(result[i]-result[j]) == j-i)  
            {  
                k++;  
            }  
        }  
    }  
    return k;  
}  

int main()  
{  
    int i,j,k,p,q,min,max=0,flag,sign,boundary=1000,s[N];  
    char father[16]="12345678",mother[16]="87654321";  
    char son[N][16],temp[16],result[100][16];  
    srand((unsigned)time(NULL));  
    for ( i = 0; i < 8; i++)//生成父母串  
    {  
        j=rand()%8;  
        k=father[j];  
        father[j]=father[i];  
        father[i]=k;  
        j=rand()%8;  
        k=mother[j];  
        mother[j]=mother[i];  
        mother[i]=k;  
    }  
    sign=0;  
    for ( i = 0; i <= boundary; i++)//控制繁殖代数  
    {  
        if (i == boundary&&sign == 1)//动态控制,如果发现不够继续繁殖3000代  
        {  
            boundary+=3000;  
            sign=0;  
        }  

        for ( j = 0; j < N; j++)//交叉和变异,对子代评估  
        {  
            strcpy(son[j],crossover(mother,father));  
            p=rand()%8;  
            q=rand()%8;  
            k=son[j][p];  
            son[j][p]=son[j][q];  
            son[j][q]=k;  
            s[j]=check(son[j]);  
        }  

        for ( j = 0; j < N; j++)//对评估结果排序,将已经符合条件的挑出  
        {  
            for ( k = j, min = j; k < N; k++)  
            {  
                if (s[min] > s[k])  
                {  
                    min=k;  
                }  
            }  
            k=s[j];  
            s[j]=s[min];  
            s[min]=k;  
            strcpy(temp,son[j]);  
            strcpy(son[j],son[min]);  
            strcpy(son[min],temp);  
            if (s[j] == 0)  
            {  
                flag=0;  
                for ( k = 0; k < max; k++)  
                {  
                    if (!strcmp(son[j],result[k]))  
                    {  
                        flag=1;  
                    }  
                }  
                if (!flag)  
                {  
                    sign=1;  
                    strcpy(result[max],son[j]);//符合的放入结果数组  
                    max++;//已经产生的符合条件的子代  
                    printf("%02d.%s\n",max,son[j]);  
                }  
            }  
        }  

        strcpy(father,son[0]);//选择两个最优的成为下一代的父母  
        strcpy(mother,son[1]);  
    }  
    printf("%d\n",boundary);//最终繁殖代数  
    system("pause");  
    return 0;  



This algorithm lists all the situations!
GREAT!!!

intro to this blog

I want to be stronger, so I am here to record pieces of my life of computer science!

Cheers!