#include <map>
using namespace std;
/*****************************************************************************************************************************************************************************
*
* 一个极其简单的代码预处理工具
* 仿造C语言的预处理,对和C语言变量名命名规则有相同规则的语言进行预处理,
* 经典例子:Lua脚本的预处理,通过define 将全局变量名称转为不可读变量(类似混淆,但实际解决的问题是local变量不足时用全局变量代替,但又不希望发布的脚本中带有任何有意义的信息)
* 同时,扩充一些支持比较的预处理和命令行预置宏,处理版本兼容时的条件编译
* 这个工具只实现一下功能:
* (1)#define Macro Code
* (2)#ifdef Macro ---#else --- #endif
* (3)#ifndef Macro --#else ---#endif
* (4)#undef Macro
*
* [*]此工程必须用unicode版本编译
*****************************************************************************************************************************************************************************/
enum CodeType
{
CT_none,
CT_Lua,
CT_Cpp,
CT_Java,
CT_Vb,
CT_Delphi,//字符串转义符号不是 '\' 而是 '#'
};
CodeType g_CodeType = CT_none;
enum FileCodePageType
{
FCP_none,
FCP_Unicode,
FCP_Utf8,
FCP_Multibyte,
};
FileCodePageType g_FCPType = FCP_Multibyte;
FileCodePageType g_ForceFCPType = FCP_none;//命令行设置的字符编码类型
map<CString, CString> g_mapMacro;
map<CStringA, CStringA> g_mapMacroA;
__inline void AddMacro(LPCWSTR MacroName, LPCWSTR Code)
{
g_mapMacro[MacroName] = Code;
}
__inline void AddMacro(LPCSTR MacroName, LPCSTR Code)
{
g_mapMacroA[MacroName] = Code;
}
__inline void DeleteMacro(LPCWSTR lpMacro)
{
map<CString, CString>::iterator i = g_mapMacro.find(lpMacro);
if(i != g_mapMacro.end())
{
g_mapMacro.erase(i);
}
}
__inline void DeleteMacro(LPCSTR lpMacro)由于工程是unicode 所以如果代码用ansiutf8的话,还要搜索Unicode的程序预置的宏
{
map<CStringA, CStringA>::iterator i = g_mapMacroA.find(lpMacro);
if(i != g_mapMacroA.end())
{
g_mapMacroA.erase(i);
}
DeleteMacro(CStringW(CStringA(lpMacro)));
}
__inline BOOL EixstMacro(LPCWSTR lpMacro)
{
if(g_mapMacro.find(lpMacro) != g_mapMacro.end())
return TRUE;
return FALSE;
}
__inline BOOL EixstMacro(LPCSTR lpMacro)//由于工程是unicode 所以如果代码用ansiutf8的话,还要搜索Unicode的程序预置的宏
{
if(g_mapMacroA.find(lpMacro) != g_mapMacroA.end())
return TRUE;
if(g_mapMacro.find(CStringW(CStringA(lpMacro))) != g_mapMacro.end())
return TRUE;
return FALSE;
}
__inline BOOL GetMarcoCode(LPCWSTR lpMacro, CString& OUT Code)
{
map<CString, CString>::iterator i = g_mapMacro.find(lpMacro);
if(i != g_mapMacro.end())
{
Code = i->second;
return TRUE;
}
return FALSE;
}
__inline BOOL GetMarcoCode(LPCSTR lpMacro, CStringA& OUT Code)
{
map<CStringA, CStringA>::iterator i = g_mapMacroA.find(lpMacro);
if(i != g_mapMacroA.end())
{
Code = i->second;
return TRUE;
}
return FALSE;
}
//做一个堆栈栈顶保存当前是否允许输出代码
BOOL g_Stack_Enable[1000] = {1};
BOOL g_StackEnableTop = 0;
__inline void PushConditionEnableCode(BOOL bEnable)
{
g_Stack_Enable[++g_StackEnableTop] = bEnable;
}
__inline void PopCondition()
{
g_StackEnableTop--;
}
__inline void InvCurrentCondition()
{
g_Stack_Enable[g_StackEnableTop] = !(g_Stack_Enable[g_StackEnableTop]);
}
__inline BOOL CurrentConditionEnable()
{
return !!(g_Stack_Enable[g_StackEnableTop]);
}
//字符串 和文件
__inline void WriteToFile(FILE* fp, const wchar_t* str, int cch)
{
fwrite(str, 1, cch * sizeof(wchar_t), fp);
}
__inline void WriteToFile(FILE* fp, const char* str, int cch)
{
fwrite(str, 1, cch, fp);
}
template<typename ChType>
bool StrMatch(const ChType* pStr, int Len, const ChType* pMat)
{
for(int i = 0; i < Len; i++)
{
if(pStr[i] != pMat[i])
return false;
}
return true;
}
__inline void OutConsoleErrPreCompile(int lineNum, LPCWSTR lpStr, int cch)
{
CString str(lpStr, cch);
printf("Invalid PreCompile Cmd At Line %d: %s\n", lineNum, CStringA(str));
}
__inline void OutConsoleErrPreCompile(int lineNum, LPCSTR lpStr, int cch)
{
CStringA str(lpStr, cch);
CA2W ws((LPCSTR)str, (g_FCPType == FCP_Utf8 ? CP_UTF8 : CP_ACP));
CStringW wstr(ws.m_psz);
printf("Invalid PreCompile Cmd At Line %d: %s\n", lineNum, CStringA(wstr));
}
__inline void WriteChar(FILE* fp, char ch)
{
fwrite(&ch, 1, 1, fp);
}
__inline void WriteChar(FILE* fp, WCHAR ch)
{
fwrite(&ch, 1, 2, fp);
}
template<typename ChType, typename StrType>
void WriteNameOrReplace(FILE* fp, const StrType& IN nameX, int LineNumber)
{
StrType strCode;
if(GetMarcoCode(nameX, strCode))
{
DoOutPutLineAndReplace<ChType, StrType>(fp, strCode, strCode.GetLength(), LineNumber); //需要递归
}
else
{
WriteToFile(fp, nameX, nameX.GetLength());
}
}
//工作逻辑
template<typename ChType>
__inline BOOL IsLetter(ChType ch)
{
if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch == '_')
return TRUE;
return 0;
}
template<typename ChType>
__inline BOOL IsDigital(ChType ch)
{
if(ch >= '0' && ch <= '9' )
return TRUE;
return 0;
}
template<typename ChType>
void __ProcessLineCode_InQuoteAndOutQuote(ChType ch, BOOL& IN OUT bInQuote, BOOL& IN OUT bInQuoteS, bool& IN OUT bChUsed, int & IN OUT i_ReadPos, const ChType* & IN OUT ptrReadPos) //处理引号内外判断逻辑
{
if(//假设语法正确,引号都是配对的
(ch == '\"' && (g_CodeType == CT_Cpp || g_CodeType == CT_Java || g_CodeType == CT_Lua || g_CodeType == CT_Vb || g_CodeType == CT_none)) ||
(ch == '\'' && (g_CodeType == CT_Delphi))//delphi 是用单引号
)
{
bInQuote ^= 1;
bChUsed = true;
}
else if(//转义符就应该在引号内,否则不管
(ch == '\\' && (g_CodeType == CT_Cpp || g_CodeType == CT_Java || g_CodeType == CT_Lua || g_CodeType == CT_none))
//delphi 是# 这个不会破坏偶数个引号规则,delphi还有两个引号表示一个引号的,也不会破坏偶数规则
//VB也是这样的
)
{
if(bInQuote || bInQuoteS)
{
i_ReadPos++;//跳过一个字符
ptrReadPos++;
bChUsed = true;
}
}
else if(ch == '\'' && (g_CodeType == CT_Cpp || g_CodeType == CT_Java || g_CodeType == CT_Lua || g_CodeType == CT_Vb || g_CodeType == CT_none))
{
if(bInQuote)
bChUsed = true;
else
{
bInQuoteS ^= 1;
bChUsed = true;
}
}
}
template<typename ChType, typename strType>
void DoOutPutLineAndReplace(FILE* fp, const ChType* pString, int LineLen, int LineNumber)
{
//输出行 并且带有宏替换
//不带引号,字母数字构成的连续字符串都可以替换
int bInQuote = 0; //双引号中
int bInQuoteS = 0; //单引号中
int letterStart = 0;
BOOL bInNameGet = FALSE;
for(int i = 0; i < LineLen; i++)
{
bool bChUsed = false; //字符被处理了
ChType ch = pString[i];
if(bInNameGet && !IsLetter(ch) && !IsDigital(ch))
{
bInNameGet = FALSE;
//Got a name
strType nameX(pString + letterStart, i - letterStart);
WriteNameOrReplace<ChType, strType>(fp, nameX, LineNumber);
}
const ChType* pJump = 0;
__ProcessLineCode_InQuoteAndOutQuote(ch, bInQuote, bInQuoteS, bChUsed, i, pJump);
if(bChUsed)
{
WriteChar(fp, ch);
if(pJump == (ChType*)1) //跳过了一个字符
{
WriteChar(fp, pString[i]);
}
continue;
}
if(bInQuote || bInQuoteS) //字符串没完事
{
WriteChar(fp, ch);
if(pJump == (ChType*)1) //跳过了一个字符
{
WriteChar(fp, pString[i]);
}
continue;
}
if(IsLetter(ch) && !bInNameGet)
{
bInNameGet = TRUE;
letterStart = i; //
}
if(!bInNameGet)
WriteChar(fp, ch);
}
//remain
if(bInNameGet && LineLen > letterStart)
{
//Got a name
strType nameX(pString + letterStart, LineLen - letterStart);
WriteNameOrReplace<ChType, strType>(fp, nameX, LineNumber);
}
}
template<typename ChType, typename strType>
strType EraseRemark(const ChType* lpCode)
{
//不在字符串就可识别结尾
static ChType Str_Lua[2] = {'-', '-'};
static ChType Str_Cpp[2] = {'/', '/'};
static ChType Str_VB[1] = {'\''};
ChType* pMark = Str_Lua;
int MarkLen = _countof(Str_Lua);
if(g_CodeType == CT_Lua)
{
pMark = Str_Lua;
MarkLen = _countof(Str_Lua);
}
else if(g_CodeType == CT_Cpp || g_CodeType == CT_Delphi || g_CodeType == CT_Java)
{
pMark = Str_Cpp;
MarkLen = _countof(Str_Cpp);
}
else if(g_CodeType == CT_Vb)
{
pMark = Str_VB;
MarkLen = _countof(Str_VB);
}
int bInQuote = 0;
int bInQuoteS = 0;
const ChType* lpStart = lpCode;
while(true)
{
ChType ch = (*lpCode);
if(ch == 0)
break;
if(!bInQuoteS && !bInQuote)
{
//此处不在引号中
if(StrMatch<ChType>(lpCode, MarkLen, pMark))
{
return strType(lpStart, lpCode - lpStart);
}
}
int i_NoUsed = 0;
bool bChUsed = false;
__ProcessLineCode_InQuoteAndOutQuote(ch, bInQuote, bInQuoteS, bChUsed, i_NoUsed, lpCode);
lpCode++;
}
return lpStart;
}
template<typename ChType, typename strType>
void ProcessOneLine(FILE* fpOut, ChType* pString, int LineLen, int LineNumber)
{
if(LineLen == 0)
{
static ChType RN[2] = {'\r', '\n'};
WriteToFile(fpOut, RN, 2);
return;
}
if(pString[0] == '#') //预处理命令
{
static ChType Str_EMPTY[1] = {0};
static ChType Str_Define[7] = {'#', 'd', 'e', 'f', 'i', 'n', 'e',};
static ChType Str_IfDef[6] = {'#', 'i', 'f', 'd', 'e', 'f',};
static ChType Str_IfNdef[7] = {'#', 'i', 'f', 'n', 'd', 'e', 'f',};
static ChType Str_Else[5] = {'#', 'e', 'l', 's', 'e',};
static ChType Str_endif[6] = {'#', 'e', 'n', 'd', 'i', 'f'};
static ChType Str_undef[6] = {'#', 'u', 'n', 'd', 'e', 'f'};
//遇到预处理时,去掉注释内容(lua:两个减号的行尾注释,C++:两个除号的行尾注释,VB:单引号行尾注释,根据文件类型来搞,没有文件类型就当作lua)
if(LineLen > 7 && StrMatch(pString, 7, Str_Define)) //#define MACRO CODE
{
if(CurrentConditionEnable())//当前允许输出代码时Define Macro才有效
{
strType str(pString + 7, LineLen - 7);
str.TrimLeft();
int fk = str.Find((ChType)' ');
if(fk > 0)
{
strType strName = str.Left(fk);
strName.Trim();
strType strCode = str.Mid(fk + 1);
strType strCodeNoRemark = EraseRemark<ChType, strType>(strCode);
strCodeNoRemark.TrimRight();
AddMacro(strName, strCodeNoRemark);
}
else//#define MACRO
{
strType strMacroNoRemark = EraseRemark<ChType, strType>(str);
strMacroNoRemark.Trim();
AddMacro(strMacroNoRemark, Str_EMPTY);
}
}
}
else if (LineLen>6 && StrMatch(pString,6,Str_undef))//#undef MACRO
{
if(CurrentConditionEnable())//当前允许输出代码时才有效
{
strType str(pString + 6, LineLen - 6);
strType strNoRemark = EraseRemark<ChType, strType>(str);
strNoRemark.Trim();
DeleteMacro(strNoRemark);
}
}
else if(LineLen > 6 && StrMatch(pString, 6, Str_IfDef)) //#ifdef MACRO
{
if(CurrentConditionEnable())//当前允许输出代码时才有效
{
strType str(pString + 6, LineLen - 6);
strType strNoRemark = EraseRemark<ChType, strType>(str);
strNoRemark.Trim();
if(EixstMacro(strNoRemark))
{
PushConditionEnableCode(TRUE);
}
else
{
PushConditionEnableCode(FALSE);
}
}
}
else if(LineLen > 7 && StrMatch(pString, 7, Str_IfNdef)) //#ifndef MACRO
{
if(CurrentConditionEnable())//当前允许输出代码时才有效
{
strType str(pString + 7, LineLen - 7);
strType strNoRemark = EraseRemark<ChType, strType>(str);
strNoRemark.Trim();
if(!EixstMacro(strNoRemark))
{
PushConditionEnableCode(TRUE);
}
else
{
PushConditionEnableCode(FALSE);
}
}
}
else if(LineLen >= 5 && StrMatch(pString, 5, Str_Else)) //#else
{
InvCurrentCondition();
}
else if(LineLen >= 6 && StrMatch(pString, 6, Str_endif)) //#endif
{
PopCondition();
}
else
{
//error
OutConsoleErrPreCompile(LineNumber, pString, LineLen);
}
}
else
{
if(CurrentConditionEnable())
{
DoOutPutLineAndReplace<ChType, strType>(fpOut, pString, LineLen, LineNumber);
}
}
}
template<typename ChType, typename strType>
void DoPreCompileCodeT(FILE* fpOut, ChType* pString)
{
//先按行分割
ChType* pStart = pString;
int lineNum = 1;
while(true)
{
ChType ch = (*pString);
pString++;
if(ch == '\n' || ch == 0)
{
//find a line
int LineLen = pString - pStart;
if(ch == 0)
{
LineLen--;//结尾0不要
}
if(LineLen > 0)
{
ProcessOneLine< ChType, strType>(fpOut, pStart, LineLen, lineNum);
}
//next Line
lineNum++;
pStart = pString;
}
if(ch == 0)
{
break;
}
}
}
void DoPreCompileCode(CString strFileIn, CString strFileOut)
{
//简单起见将文件直接读入内存(文件如果没有Unicode的BoM头 就是ansi或utf-8 )
FILE* fpInput = _wfopen(strFileIn, L"rb");
if(fpInput)
{
long flen = 0;
{
long cur = ftell(fpInput);
fseek(fpInput, 0, SEEK_END);
flen = ftell(fpInput);
fseek(fpInput, cur, SEEK_SET);
}
BYTE* pBuf = new(nothrow) BYTE[flen + 10];
if(pBuf)
{
memset(pBuf, 0, flen + 10);
fread(pBuf, 1, flen, fpInput);
//创建输出文件
FILE* fpOut = _wfopen(strFileOut, L"wb+");
if(fpOut)
{
//判断是否是Unicode
const WORD bom = 0xFEFF;
//EF BB BF 是Utf8的bom头,应该识别一下并跳过,不然第一个#的预处理无法识别
const BYTE utf8Bom[3] = {0xEF, 0xBB, 0xBF};
BOOL nHasUnicodeBOM = 0;
INT nHasUtf8BOM = 0;
if(flen >= 2 && (*((WORD*)pBuf)) == bom)
nHasUnicodeBOM = 2;
if(flen >= 3 && pBuf[0] == utf8Bom[0] && pBuf[1] == utf8Bom[1] && pBuf[2] == utf8Bom[2])
nHasUtf8BOM = 3;
BOOL bWideCharOrMultyChar = TRUE; //true=WCHAR false=CHAR
if(g_ForceFCPType == FCP_none) //没有强制指定字符集,采用bom头判断
{
if(nHasUnicodeBOM)
{
g_FCPType = FCP_Unicode;
bWideCharOrMultyChar = TRUE;
}
else
{
if(nHasUtf8BOM)
g_FCPType = FCP_Utf8;
else
g_FCPType = FCP_Multibyte;
bWideCharOrMultyChar = FALSE;
}
}
else//强制指定字符集
{
g_FCPType = g_ForceFCPType;
if(g_ForceFCPType == FCP_Unicode) //宽字符
{
bWideCharOrMultyChar = TRUE;
}
else if(g_ForceFCPType == FCP_Multibyte || g_ForceFCPType == FCP_Utf8) //多字符
{
bWideCharOrMultyChar = FALSE;
if(g_ForceFCPType != FCP_Utf8)//就没有bom
nHasUtf8BOM = 0;
}
}
if(bWideCharOrMultyChar)
{
WCHAR* pStrUnicode = (WCHAR*)(pBuf + nHasUnicodeBOM);
if(nHasUnicodeBOM)
fwrite(&bom, 1, sizeof(bom), fpOut);
DoPreCompileCodeT<WCHAR, CStringW>(fpOut, pStrUnicode);
}
else
{
char* pStrUtf8OrAnsi = (char*)(pBuf + nHasUtf8BOM);
if(nHasUtf8BOM)
fwrite(&utf8Bom[0], 1, sizeof(utf8Bom), fpOut);
DoPreCompileCodeT<char, CStringA>(fpOut, pStrUtf8OrAnsi);
}
fclose(fpOut);
}
else
{
wprintf(L"cannot open output file %s\n", strFileOut);
}
delete[] pBuf;
}
else
{
printf("cannot alloc buf\n");
}
fclose(fpInput);
}
else
{
wprintf(L"cannot open input file %s\n", strFileIn);
}
}
void parse_option(CString strOption)
{
if(strOption.GetLength() > 5 && strOption.Left(5).CompareNoCase(L"/def:") == 0)
{
//补充一个宏定义
CString strMacroName = strOption.Mid(5);
AddMacro(strMacroName, L"");
}
else if(strOption.GetLength() > 4 && strOption.Left(4).CompareNoCase(L"/cp:") == 0)
{
//设置字符集
CString strCP = strOption.Mid(4);
if(strCP.CompareNoCase(L"ansi") == 0)
{
g_ForceFCPType = FCP_Multibyte;
}
else if(strCP.CompareNoCase(L"utf8") == 0)
{
g_ForceFCPType = FCP_Utf8;
}
else if(strCP.CompareNoCase(L"unicode") == 0)
{
g_ForceFCPType = FCP_Unicode;
}
else
{
wprintf(L"unknown code page setting %s, Use defalut\n", strCP);
}
}
}
void GetCodeType(CString strFileInput)
{
if(strFileInput.GetLength() > 4 && strFileInput.Right(4).CompareNoCase(L".lua") == 0)
g_CodeType = CT_Lua;
else if(strFileInput.GetLength() > 4 && strFileInput.Right(4).CompareNoCase(L".cpp") == 0)
g_CodeType = CT_Cpp;
else if(strFileInput.GetLength() > 2 && strFileInput.Right(2).CompareNoCase(L".c") == 0)
g_CodeType = CT_Cpp;
else if(strFileInput.GetLength() > 3 && strFileInput.Right(3).CompareNoCase(L".js") == 0)
g_CodeType = CT_Java;
else if(strFileInput.GetLength() > 5 && strFileInput.Right(5).CompareNoCase(L".java") == 0)
g_CodeType = CT_Java;
else if(strFileInput.GetLength() > 4 && strFileInput.Right(4).CompareNoCase(L".vbs") == 0)
g_CodeType = CT_Vb;
else if(strFileInput.GetLength() > 4 && strFileInput.Right(4).CompareNoCase(L".bas") == 0)
g_CodeType = CT_Vb;
else if(strFileInput.GetLength() > 4 && strFileInput.Right(4).CompareNoCase(L".pas") == 0)
g_CodeType = CT_Delphi;
else
g_CodeType = CT_Lua;
}
int _tmain(int argc, _TCHAR* argv[])
{
//app x.code out.code "/def:Macro" ...
if(argc >= 3)
{
CString strFileInput = argv[1];
GetCodeType(strFileInput);
CString strFileOutput = argv[2];
for(int i = 3; i < argc; i++)
{
parse_option(argv[i]);
}
DoPreCompileCode(strFileInput, strFileOutput);
}
else
{
printf("no input file or out file ,usage : app x.code out.code \"/def:Macro\" ... \n");
printf(" options: \n");
printf(" (1)\"/def:MACRO\" = pre-Define a Macro Named MACRO\n");
printf(" (2)\"/cp:ansi/utf8/unicode\" = Set Code\'s CodePage\n");
}
return 0;
}