[讨论]关于C#屏幕取词的技术实现

leinchen 2006-01-28 06:35:54
引言
  四通利方和金山词霸的用户都曾见识过屏幕抓字技术,鼠标指哪就翻译哪个单词,这个技术看似简单,其实在WINDOWS系统中实现却是非常复杂和有趣的。 经过半年多的艰辛探索,笔者终于破解了其中的秘密,并在今天决定公开它,这个人人 都曾见过但是却鲜有人知的秘密,这个只被几家软件公司垄断从未在公开的报刊 资 料披露过只言片语的秘密!回想这半年多的探索,其中浸润了多少笔者的苦闷与欢乐,绝望与兴奋,挫折与收获,现在都终于有了结果:将屏幕抓字技术的秘密公开,献给孜孜不倦辛勤工作的程序员们。如果这样做能为国产软件事业的发展效微薄之力,对笔者来说,也 是一桩快事!
引:<屏幕抓字技术揭密>-深入WINDOWS内部探险手记-郑州 马飞涛

  在这里提到的相关技术到底需要些什么了..在GOOGLE上的查询让我至少知道了他需要两个条件:
  1.了解API
  2.了解计算机存储方式.

  在这里想和大家一起讨论这方面的技术实现与优化.我将将我在实践中的所得全部公布,希望大家积极参与讨论



...全文
1186 点赞 收藏 25
写回复
25 条回复
切换为时间正序
当前发帖距今超过3年,不再开放新的回复
发表回复
Lovefoxpro 2006-06-14
关注一下。另外上面的delphi代码怎么那么乱,没有经过IDE整理么?
回复
Radar2006 2006-04-28
api不熟,先顶一下
回复
overdream 2006-04-28
http://topic.csdn.net/t/20050613/11/4078197.html
回复
overdream 2006-04-28
'模块文件:
Option Explicit

Public Const EM_CHARFROMPOS = &HD7
Public Const EM_GETLINECOUNT = &HBA
Public Const EM_GETLINE = &HC4
Public Const EM_LINEINDEX = &HBB
Public Const EM_LINELENGTH = &HC1
Public Const EM_SETSEL = &HB1

Declare Sub RtlMoveMemory Lib "KERNEL32" (lpvDest As Any, lpvSource As Any, ByVal cbCopy As Long)
Declare Function SendMessage Lib "user32" Alias "SendMessageA" (ByVal hwnd As Long, ByVal wMsg As Long, ByVal wParam As Long, lParam As Any) As Long


'给你整个frm文件:
VERSION 5.00
Begin VB.Form Form1
Caption = "读取鼠标所在位置的单词"
ClientHeight = 2955
ClientLeft = 60
ClientTop = 345
ClientWidth = 5160
LinkTopic = "Form1"
ScaleHeight = 197
ScaleMode = 3 'Pixel
ScaleWidth = 344
StartUpPosition = 3 '窗口缺省
Begin VB.TextBox Text1
BeginProperty Font
Name = "MS Serif"
Size = 12
Charset = 0
Weight = 400
Underline = 0 'False
Italic = 0 'False
Strikethrough = 0 'False
EndProperty
Height = 2535
Left = 240
MultiLine = -1 'True
ScrollBars = 2 'Vertical
TabIndex = 0
Text = "GetWord2.frx":0000
Top = 240
Width = 4695
End
End
Attribute VB_Name = "Form1"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Option Explicit

Private Sub Text1_MouseDown(Button As Integer, Shift As Integer, X As Single, Y As Single)
Dim pos As Long, lc As Long
Dim Line As Integer, CharPos As Integer

pos = X / Screen.TwipsPerPixelX + Y / Screen.TwipsPerPixelY * 65536
lc = SendMessage(Text1.hwnd, EM_CHARFROMPOS, 0, ByVal pos)

Line = lc \ 65536
CharPos = lc Mod 65536

MsgBox " = " & GetLine(Text1, Line) & vbCrLf & "单词= " & GetWord(Text1, CharPos)
End Sub

Function GetWord(txt As TextBox, pos As Integer) As String
Dim bArr() As Byte, pos1 As Integer, pos2 As Integer, i As Integer

bArr = StrConv(txt.Text, vbFromUnicode)
pos1 = 0: pos2 = UBound(bArr)

For i = pos - 1 To 0 Step -1
If IsDelimiter(bArr(i)) Then
pos1 = i + 1
Exit For
End If
Next

For i = pos To UBound(bArr)
If IsDelimiter(bArr(i)) Then
pos2 = i - 1
Exit For
End If
Next

If pos2 > pos1 Then
ReDim bArr2(pos2 - pos1) As Byte
For i = pos1 To pos2
bArr2(i - pos1) = bArr(i)
Next

GetWord = StrConv(bArr2, vbUnicode)


SendMessage txt.hwnd, EM_SETSEL, pos1, ByVal CLng(pos2 + 1)
Else
GetWord = ""
End If
End Function

Function IsDelimiter(ByVal Char As Byte) As Boolean
Dim S As String

S = Chr(Char)
IsDelimiter = False
If S = " " Or S = "," Or S = "." Or S = "?" Or S = vbCr Or S = vbLf Then
IsDelimiter = True
End If
End Function

Function GetLine(txt As TextBox, ByVal Line As Integer) As String
Dim S As String, Length As Integer, pos As Long

GetLine = ""
pos = SendMessage(txt.hwnd, EM_LINEINDEX, Line, ByVal 0&)
Length = SendMessage(txt.hwnd, EM_LINELENGTH, pos, ByVal 0&)
S = String(Length, Chr(0))
RtlMoveMemory ByVal S, Length, 2
If SendMessage(Text1.hwnd, EM_GETLINE, Line, ByVal S) > 0 Then
GetLine = S
End If
End Function

Private Sub Text1_MouseMove(Button As Integer, Shift As Integer, X As Single, Y As Single)
Dim pos As Long, lc As Long
Dim Line As Integer, CharPos As Integer

pos = X / Screen.TwipsPerPixelX + Y / Screen.TwipsPerPixelY * 65536
lc = SendMessage(Text1.hwnd, EM_CHARFROMPOS, 0, ByVal pos)

Line = lc \ 65536
CharPos = lc Mod 65536

Text1.ToolTipText = GetWord(Text1, CharPos)
End Sub

回复
overdream 2006-04-28
晕,头晕中
回复
leinchen 2006-02-06
小弟先谢谢各位了
回复
leinchen 2006-02-06
还有最后一段
NHD_GetWordTimerProc(hwnd: HWND; msg: word; idTimer: word; dwTime: DWORD);stdcall;begin //may be proior finished by Getword message; if g_bInGetWord then begin g_bInGetWord := FALSE; //UnHook TextOut; BL_SetFlag32(GETWORD_DISABLE, 0, 0, 0); BL_GetText32(g_TextBuffer, NHD_MAX_TEXTLEN, @G_Rect); end; KillTimer(g_hFlyWin, NHD_GETWORD_TIMER); g_nGWTimerID := 0; PostMessage(g_hNHMainWin, NHD_WM_GETWORD_OK, 0, 0);end;procedure NHD_BeginGetWord(ptMousePos: TPOINT);var szAppClassName : array [0..NHD_CLASSNAME_LEN] of char; hAppWin : LongWord; nFlyWinLeft : integer; nFlyWinWidth : integer; rcAppWin : TRECT ; cmpstr : string;begin //get window from mouse point; hAppWin := WindowFromPoint(ptMousePos); //check if the app window is EDIT, if it is, redraw whole line; GetClassName(hAppWin, szAppClassName, NHD_CLASSNAME_LEN); (*DbgPrintf("hAppWin: %x\n", hAppWin); DbgPrintf("ClassName: %s\n", szAppClassName);*) cmpstr := trim(strpas(szAppClassName)); Frm_main.Edit2.text := cmpstr; if ((cmpstr = 'Edit') or //NotePad (cmpstr = 'Internet Explorer_Server') or //IE4.0 (cmpstr = 'RichEdit') or // (cmpstr = 'RichEdit20A') or //WordPad (cmpstr = 'RichEdit20W') or //WordPad (cmpstr = 'HTML_Internet Explorer') or //IE3.0 (cmpstr = 'ThunderTextBox') or //VB Edit (cmpstr = 'ThunderRT5TextBox') or //VB Edit (cmpstr = 'ThunderRT6TextBox') or //VB Edit (cmpstr = 'EXCEL<') or //Excel 2000 (cmpstr = 'EXCEL7') or //Excel 2000 (cmpstr = 'EXCEL6') or //Excel 2000 (cmpstr = 'ConsoleWindowClass') or //NT V86 (cmpstr = 'Edit') or (cmpstr = 'tty') or (cmpstr = 'ttyGrab')) //Word97 then begin GetWindowRect(hAppWin, rcAppWin); nFlyWinLeft := rcAppWin.left - 4; nFlyWinWidth := rcAppWin.right - rcAppWin.left - 8; //don't not repaint whole line if too long; if (ptMousePos.x - nFlyWinLeft) > 200 then begin nFlyWinLeft := ptMousePos.x - 200; end; //DbgPrintf("!!!!tty window"); end else begin nFlyWinLeft := ptMousePos.x; nFlyWinWidth := NHD_FLYWIN_WIDTH; end; //note: move the flywin to cursor pos "x - 1" to aviod mouse shape changing between ARROW and EDIT in edit area; //use SetWindowPos instead of MoveWindow, for MoveWindow can not make menu item redraw. SetWindowPos(g_hFlyWin, HWND_TOPMOST, nFlyWinLeft, ptMousePos.y - 1 , nFlyWinWidth, NHD_FLYWIN_HEIGHT, SWP_NOACTIVATE or SWP_NOREDRAW); //set flag to avoid re-entry; g_bInGetWord := TRUE; //hook TextOut; BL_SetFlag32(GETWORD_ENABLE, g_hFlyWin, ptMousePos.x, ptMousePos.y); MoveWindow(g_hFlyWin, -1, -1, NHD_FLYWIN_WIDTH, NHD_FLYWIN_HEIGHT, TRUE); g_nGWTimerID := SetTimer(g_hFlyWin, NHD_GETWORD_TIMER, NHD_GW_WAITING_TIME, @NHD_GetWordTimerProc);end;function NHD_CopyWordsTo(szBuffer: pchar; nBufferSize: Integer):Boolean;var nLen : integer;begin nLen := sizeof(g_TextBuffer); if(nLen + 1) > nBufferSize then begin result := false; exit; end; ZeroMemory(szBuffer,nBufferSize); CopyMemory(szBuffer, @g_TextBuffer, nLen); result := true; end;function NHD_ExitGetWords(): boolean;begin //free
回复
wwqna 2006-02-06
用C#来做,那速度能跟上吗?
回复
lidong6 2006-02-06
顶一个先
回复
leinchen 2006-02-06
bInGetWord : boolean; currpoint:Tpoint; G_Rect : TRECT;implementationuses unit1;function NHD_CreateWindow(hInst: Integer): HWND;var hwnd : LongWord; wc : TWndClassA;begin if hInst = 0 then begin result :=0; exit; end; with wc do begin style := WS_EX_TOPMOST; lpfnWndProc := @NHD_FlyWndProc; (*消息处理函数*) hInstance := hInst; hbrBackground := color_btnface + 1; lpszClassname := 'NHD_FLYWIN_DEMO'; hicon := 0; hCursor := 0; cbClsExtra := 0; cbWndExtra := 0; end; RegisterClass(wc); hwnd := CreateWindowEx (WS_EX_TOPMOST or WS_EX_TOOLWINDOW, 'NHD_FLYWIN_DEMO', 'NHD_FlyWindow_Demo', WS_POPUP or WS_VISIBLE, NHD_WIN_INITPOSX, NHD_WIN_INITPOSY, NHD_FLYWIN_WIDTH, NHD_FLYWIN_HEIGHT, 0, 0, hInst, nil); result := hwnd;end;function NHD_FlyWndProc(hWnd, Msg,wParam,lParam: Integer): Integer; stdcall;begin //Unhook textout when reveived msg from getword; if msg = g_WM_GetWordOk then begin if g_bInGetWord then begin g_bInGetWord := FALSE; KillTimer(g_hFlyWin, NHD_GETWORD_TIMER); g_nGWTimerID := 0; BL_SetFlag32(GETWORD_DISABLE, 0, 0, 0); if wParam = 0 then begin BL_GetText32(@g_TextBuffer, sizeof(g_TextBuffer), @G_Rect); end; PostMessage(g_hNHMainWin, NHD_WM_GETWORD_OK, 0, 0); result := 0; exit; end; end; result := DefWindowProc(hWnd, msg, wParam, lParam);end;procedure
回复
leinchen 2006-02-06
private { Private declarations } procedure GetMousePosHwndAndClassName(Sender : TPoint);procedure TForm1.GetMousePosHwndAndClassName(Sender: TPoint);var hWnd: THandle; aName: array [0..255] of char;begin hWnd := WindowFromPoint(Sender); Label1.Caption := 'Handle : ' + IntToStr(hWnd); if boolean(GetClassName(hWnd, aName, 256)) then Label2.Caption := 'ClassName : ' + string(aName) else Label2.Caption := 'ClassName : not found';end;procedure TForm1.FormCreate(Sender: TObject);begin Form1.FormStyle := fsStayOnTop; Timer1.Interval := 50;end;procedure TForm1.Timer1Timer(Sender: TObject);var rPos: TPoint;begin if boolean(GetCursorPos(rPos)) then GetMousePosHwndAndClassName(rPos);end; 来自:Alex_Y, 时间:2004-5-22 14:08:29, ID:2622204要建个DLL实现钩子unit GetWord;interfaceuses SysUtils, windows, messages;const NHD_GETWORD_TIMER = 2;const NHD_MAX_TEXTLEN = 1024;const NHD_WIN_INITPOSX = -1;const NHD_WIN_INITPOSY = -1;const NHD_FLYWIN_WIDTH = 1;const NHD_FLYWIN_HEIGHT = 1;const NHD_CLASSNAME_LEN = 256;const NHD_GW_WAITING_TIME = 200; //get word waiting time; (*设置屏幕抓取函数*) type TBL_SetFlag32 = function (nFlag : word; //设置是否取词 hNotifyWnd : HWND; //当取词后得窗口句柄 MouseX : integer; //X坐标 MouseY : integer): DWORD;stdcall; //Y坐标(* 功能: 启动或停止取词。 参数: nFlag [输入] 指定下列值之一: GETWORD_ENABLE: 开始取词。在重画被取单词区域前设置此标志。nhw32.dll是通过 重画单词区域,截取TextOutA, TextOutW, ExtTextOutA, ExtTextOutW等Windows API函数的参数来取词的。 GETWORD_DISABLE: 停止取词。 hNotifyWnd [输入] 通知窗口句柄。当取到此时,向该通知窗口发送一登记消息:GWMSG_GETWORDOK。 MouseX [输入] 指定取词点的X坐标。 MouseY [输入] 指定取词点的Y坐标。 返回值: 可忽略。*)type TLPRECT = ^TRECT; (*定义指针先*) type TBL_GetText32 = function(lpszCurWord : pchar; nBufferSize : integer; lpWordRect : TLPRECT ): DWORD;stdcall; (*功能: 从内部缓冲区取出单词文本串。对英语文本,该函数最长取出一行内以空格为界的三个英文单词串, 遇空格,非英文字母及除'-'外的标点符号,则终止取词。对汉字文本,该函数最长取出一行汉字串, 遇英语字母,标点符号等非汉语字符,则终止取词。该函数不能同时取出英语和汉语字符。 参数: lpszCurWord [输入] 目的缓冲区指针。 nBufferSize [输入] 目的缓冲区大小。 lpWordRect [输出] 指向 RECT 结构的指针。该结构定义了被取单词所在矩形区域。 返回值: 当前光标在全部词中的位置。*) type TSetNHW32 = function(): boolean; stdcall; (* 功能: Win NT/2000 环境下的初始化函数。一般在程序开始时,调用一次。 参数: 无。 返回值: 如果成功 TRUE ,失败 FALSE 。 *) type TResetNHW32= function():boolean; stdcall;(* 功能: Win NT/2000 环境下的去初始化函数。一般在程序结束时调用。 参数: 无。 返回值: 如果成功 TRUE ,失败 FALSE 。*)function NHD_FlyWndProc(hWnd, Msg,wParam,lParam: Integer): Integer; stdcall;function NHD_CreateWindow(hInst: Integer): HWND;procedure NHD_BeginGetWord(ptMousePos: TPOINT);function NHD_ExitGetWords(): boolean;function NHD_DestroyWindow(): boolean;procedure NHD_FreeLoadedLib();function NHD_InitGetWords(hInst: THANDLE; hwnd: HWND): HWND;function NHD_LoadGetWordLib(): boolean;var WinClass: TWndClassA; Inst: Integer; Msg: TMsg; g_TextBuffer : array[0..1024] of char; g_hFlyWin : HWND; g_nGWTimerID : word; g_hGetWordInst : Integer; BL_SetFlag32 : TBL_SetFlag32; BL_GetText32 : TBL_GetText32; SetNHW32 : TSetNHW32; ResetNHW32 : TResetNHW32; g_hNHMainWin : HWND; g_WM_GetWordOk:WORD; g_
回复
leinchen 2006-02-06
以下是DELPHI原码不知道有没有高人可以帮忙翻译成C#一下:
回复
fellowcheng 2006-02-03
关注学习
回复
leinchen 2006-02-03
能不能把列子连接放到这里给大家参考
回复
mengyao 2006-02-03
想讨论,但没有研究过
回复
ChrisAK 2006-02-02
OCR,图像识别(字符).Office2003就有这个功能.
回复
boyzhang 2006-02-02
对了,忘了说,如果不调用Win32API的话,估计做起来会很麻烦.

网上好像有一个老外做的VB6+Win32 API的例子.
回复
boyzhang 2006-02-02
截获TextOut一类的API,根据鼠标当前坐标判断屏幕所在区域的字符,

然后自己做一个分词的程序(比如根据空格分词).
回复
pyuan 2006-02-02
ChrisAK(Chris)

偶只知道大概的过程.
我开始以为是获得句柄后GetWindowText(估计这样认为的人还不少,偶遇到好多了.)真正的方法是这样的:通过拦截DrawText,TextOut等GDI函数的调用.来获得输出的内容.当你把鼠标移动到窗体上时.程序将向窗体发送消息,强制目标程序重绘该窗体,然后在拦截函数中获得字符.

虽然说起好像很简单,不过实现起来却很复杂.特别是API的拦截.所以用C#做难度是不是太大了点??建议LZ结合C++来实现比较好.




我也学了一段时间的C#,就是因为学C#,所以对WIN API的使用真的是一点也不清楚。在C#里都是强调要使用自己的安全的类库,你所说的这些除了用C#调用API,C#自己的类库一点半法也没有……说以后WIN FX要取代WIN32 API,可是他能实现这么多底层的功能吗?表示怀疑啊……

感觉这种东西有必要用C#做不。。。
回复
leinchen 2006-02-02
请问OCR是什么东西 。。这个我好象没有接触过。。
还有一个问题,。既然是 图片。怎么转换成字符。
回复
加载更多回复
相关推荐
发帖
C#
创建于2007-09-28

10.6w+

社区成员

.NET技术 C#
申请成为版主
帖子事件
创建了帖子
2006-01-28 06:35
社区公告

让您成为最强悍的C#开发者