这个项目是结合了 凝聚度、信息熵 、n_gram 、 aotuphrasex算法和flask后端算法的集成。​

首先看一下这个文件的分布:

注意 html 页面一定要在 :templates文件夹下

2w 1234.txt都是进行关键词提取的测试文件

基于flask 搭建新词发现提取网站_html

 首先是主函数:

import os
from string import Template
from flask import Flask, request, send_file
from werkzeug.utils import secure_filename
from flask import Flask, render_template, redirect, url_for,make_response
import urllib.request
app = Flask(__name__)
pwd = os.path.dirname(__file__)
print(pwd)
import urllib
import requests
import os
import time
import threading
import lxml
import datetime
import requests
import json
import pandas as pd
import math
from autophrasex import *
import numpy as np
import re
from numpy import log, min
from collections import defaultdict
from tqdm import tqdm
from collections import Counter

# 基于autophrase方法
autophrase = AutoPhrase(
reader=DefaultCorpusReader(tokenizer=JiebaTokenizer()),
selector=DefaultPhraseSelector(),
extractors=[NgramsExtractor(N=4),IDFExtractor(),EntropyExtractor()])

# 基于信息熵的挖掘方法
def xin_xi_shang(file):
f = open(file, 'r') # 读取文章
s = f.read() # 读取为一个字符串
# 定义要去掉的标点字
drop_dict = [u',', u'\n', u'。', u'、', u':', u'(', u')', u'[', u']', u'.', u',', u' ', u'\u3000', u'”', u'“', u'?', u'?',
u'!', u'‘', u'’', u'…']
for i in drop_dict: # 去掉标点字
s = s.replace(i, '')
# 为了方便调用,自定义了一个正则表达式的词典
myre = {2: '(..)', 3: '(...)', 4: '(....)', 5: '(.....)', 6: '(......)', 7: '(.......)'}
min_count = 10 # 录取词语最小出现次数
min_support = 30 # 录取词语最低支持度,1代表着随机组合
min_s = 3 # 录取词语最低信息熵,越大说明越有可能独立成词
max_sep = 4 # 候选词语的最大字数
t = [] # 保存结果用。
t.append(pd.Series(list(s)).value_counts()) # 逐字统计
tsum = t[0].sum() # 统计总字数
rt = [] # 保存结果用
for m in range(2, max_sep + 1):
print(u'正在生成%s字词...' % m)
t.append([])
for i in range(m): # 生成所有可能的m字词
t[m - 1] = t[m - 1] + re.findall(myre[m], s[i:])
t[m - 1] = pd.Series(t[m - 1]).value_counts() # 逐词统计
t[m - 1] = t[m - 1][t[m - 1] > min_count] # 最小次数筛选
tt = t[m - 1][:]
for k in range(m - 1):
qq = np.array(list(map(lambda ms: tsum * t[m - 1][ms] / t[m - 2 - k][ms[:m - 1 - k]] / t[k][ms[m - 1 - k:]],
tt.index))) > min_support # 最小支持度筛选。
tt = tt[qq]
rt.append(tt.index)
def cal_S(sl): # 信息熵计算函数
return -((sl / sl.sum()).apply(log) * sl / sl.sum()).sum()
for i in range(2, max_sep + 1):
print(u'正在进行%s字词的最大熵筛选(%s)...' % (i, len(rt[i - 2])))
pp = [] # 保存所有的左右邻结果
for j in range(i + 2):
pp = pp + re.findall('(.)%s(.)' % myre[i], s[j:])
pp = pd.DataFrame(pp).set_index(1).sort_index() # 先排序,这个很重要,可以加快检索速度
index = np.sort(np.intersect1d(rt[i - 2], pp.index)) # 作交集
# 下面两句分别是左邻和右邻信息熵筛选
index = index[np.array(list(map(lambda s: cal_S(pd.Series(pp[0][s]).value_counts()), index))) > min_s]
rt[i - 2] = index[np.array(list(map(lambda s: cal_S(pd.Series(pp[2][s]).value_counts()), index))) > min_s]
for i in range(len(rt)):
t[i + 1] = t[i + 1][rt[i]]
t[i + 1].sort_index(ascending=True)
words_sum={}
for i in t[1:]:
for idx in range(len(i)):
words_sum[i.index[idx]]=i[idx]
words_sum=sorted(words_sum.items(),key=lambda x:x[1],reverse=True)
key_sorce={}
words=[]
for word,score in words_sum:
key_sorce[word]=score
words.append(word)
return key_sorce,words



# 基于 滴滴侧接口提取的方法
class OracleClient(object):
def __init__(self, url, apikey):
"""
We provide two functions as examples, for other functions
please implement them by yourself
:param url:
:param caller:
"""
self.url = url
self.headers = {
'content-type': 'application/json',
'apikey': apikey
}
self.keyword_url = url + '/api/v2/keyword'

def keyword(self, text):
data = {
'text': text
}
resp = requests.post(self.keyword_url, data=json.dumps(data), headers=self.headers)
return resp.json()
headers = ''
client = OracleClient('', headers)
def key_word(text_list):
"""
关键字请求接口
存放句子的list :text_list
使用全局变量 :keyword_sum = {}
"""
for line in text_list :
try:
res = client.keyword(line)
print(res)
if len(res) > 0 and len(res['data']['keyword']) > 0: # 防止这个是空出现错误
key_sum = len(res['data']['keyword']) # 统计所有的关键字
key_dist = res['data']['keyword']
for i in key_dist:
if i['word'] not in list(keyword_sum.keys()):
keyword_sum[i['word']]=1 # 第一次 的时候是赋值1
else:
keyword_sum[i['word']] =keyword_sum[i['word']]+1 # 第二次的时候是 赋值 +1
except:
pass

def list_of_groups(list_info, per_list_len):
" 把一个列表分成多个长度相等的列表 "
list_of_group = zip(*(iter(list_info),) *per_list_len)
end_list = [list(i) for i in list_of_group] # i is a tuple
count = len(list_info) % per_list_len
end_list.append(list_info[-count:]) if count !=0 else end_list
return end_list


def Multi_threading(file,threading_sum):
"""
texts_list: 存放句子的列表
hreading_sum "开线程的个数
"""
global keyword_sum
keyword_sum = {}# 设置一个全局变量解决多线程的数据存储问题
with open(file,"r") as f:
lines=f.readlines()
if len(lines)<500:
threading_sum=1
end_lists=list_of_groups(lines,int(len(lines)/threading_sum))
# print(len(end_lists))
ts = [threading.Thread(target=key_word, args=(i,)) for i in end_lists]
[t.start() for t in ts]
[t.join() for t in ts]
print("keyword_sum",keyword_sum)
return keyword_sum



# 基于凝固度的方法
class ning_gu_du(object):
def __init__(self, txt_path, min_count=25, token_length=4, min_proba={2: 5, 3: 25, 4: 125}):
self.txt_path = txt_path
self.min_count = min_count
self.token_length = token_length
self.min_proba = min_proba
self.read_text()
self.statistic_ngrams()
self.filter_ngrams()
self.sentences_cut()
self.judge_exist()
self.calculate_prob_score()

def read_text(self):
print("reading text!")
with open(self.txt_path, encoding='utf-8') as f:
texts = f.readlines()
texts = list(map(lambda x: x.strip(), texts))
self.texts = list(
map(lambda x: re.sub('[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()0-9a-zA-Z]+', "", x), texts))
# print(self.texts[0:2])

def statistic_ngrams(self): # 粗略统计1,2..ngrams
print('Starting statistic ngrams!')
ngrams = defaultdict(int)
for txt in self.texts:
for char_id in range(len(txt)):
for step in range(1, self.token_length + 1):
if char_id + step <= len(txt):
ngrams[txt[char_id:char_id + step]] += 1
self.ngrams = {k: v for k, v in ngrams.items() if v >= self.min_count}

def calculate_prob(self, token): # 计算2grams及以上的凝固度
self.total = sum([v for k, v in self.ngrams.items() if len(k) == 1])
sorce_list = []
if len(token) >= 2:
score = min(
[self.total * self.ngrams[token] / (self.ngrams[token[:i + 1]] * self.ngrams[token[i + 1:]]) for i in
range(len(token) - 1)])
# print(score)
if score > self.min_proba[len(token)]:
return True
else:
return False

def calculate_prob_score(self): # 计算2grams及以上的凝固度 返回一个列表分数
sorce_list = []
for token in self.ngrams:
self.total = sum([v for k, v in self.ngrams.items() if len(k) == 1])
if len(token) >= 2:
score = min(
[self.total * self.ngrams[token] / (self.ngrams[token[:i + 1]] * self.ngrams[token[i + 1:]]) for i
in range(len(token) - 1)])
# print(score)
if score > self.min_proba[len(token)]:
sorce_list.append(score)
else:
pass
return sorce_list

def filter_ngrams(self): # 过滤凝固度小于设定阈值的词
key_sorce = []
self.ngrams_ = set(token for token in self.ngrams if self.calculate_prob(token))
self.sorce_list = self.calculate_prob_score()
return self.ngrams_, self.sorce_list

def cut_sentence(self, txt):
mask = np.zeros(len(txt) - 1) # 从第二个字开始标注
for char_id in range(len(txt) - 1):
for step in range(2, self.token_length + 1):
if txt[char_id:char_id + step] in self.ngrams_:
mask[char_id:char_id + step - 1] += 1
sent_token = [txt[0]]
for index in range(1, len(txt)):
if mask[index - 1] > 0:
sent_token[-1] += txt[index]
else:
sent_token.append(txt[index])
return (txt, sent_token)

def sentences_cut(self):
self.sentences_tokens = []
all_tokens = defaultdict(int)
for txt in self.texts:
if len(txt) > 2:
for token in self.cut_sentence(txt)[1]:
all_tokens[token] += 1
self.sentences_tokens.append(self.cut_sentence(txt))
self.all_tokens = {k: v for k, v in all_tokens.items() if v >= self.min_count}

def is_real(self, token):
if len(token) >= 3:
for i in range(3, self.token_length + 1):
for j in range(len(token) - i + 1):
if token[j:j + i] not in self.ngrams_:
return False
return True
else:
return True

def judge_exist(self):
self.pairs = [] ##按照句子-token 进行显示
for sent, token in self.sentences_tokens:
real_token = []
for tok in token:
if self.is_real(tok) and len(tok) != 1:
real_token.append(tok)
self.pairs.append((sent, real_token))

self.new_word = {k: v for k, v in self.all_tokens.items() if self.is_real(k)}
key_sorce = {}
for word, sorce in self.new_word.items():
if len(word) > 1:
key_sorce[word] = sorce
key_sorce = sorted(key_sorce.items(), key=lambda x: x[1], reverse=True)
words = []
for word, sorce in key_sorce:
words.append(word)
return key_sorce, words

def statistic_token(self): # 统计发现的新词的个数
count = defaultdict(int)
length = list(map(lambda x: len(x), self.new_word.keys()))
for i in length:
count[i] += 1
# print("每个词的字符串长度的个数统计:", count)




def action_keyword(path,data_name):
txt_path=path+data_name
findtoken = ning_gu_du(txt_path)
words_sum_nigudu, words_nigudu = findtoken.judge_exist()

corpus_files = txt_path
quality_phrase_files = '/Users/didi/Documents/AutoPhraseX-main/data/wiki_quality.txt'
# 开始挖掘
predictions = autophrase.mine(corpus_files=[corpus_files],quality_phrase_files=quality_phrase_files,
callbacks=[LoggingCallback(),ConstantThresholdScheduler(),EarlyStopping(patience=2, min_delta=3)])
words_autophrase=[]
word_sum_autophrase={}
for word,score in predictions:
word = word.replace(' ', '')
words_autophrase.append(word)
word_sum_autophrase[word]=score


keyword_sum=Multi_threading(txt_path,100,)# 多线程
keyword_sum = sorted(keyword_sum.items(), key=lambda x: x[1], reverse=True)
words_key = []
for word, sorce in keyword_sum:
words_key.append(word)

file=txt_path
words_sum_xin_xi_shang,words_xin_xi_shang=xin_xi_shang(file)


# 四种方法 每种方法去分数排名的前1000个单词
# 处理速度 一秒处理100个句子
def Summary_4000(words_nigudu,words_xin_xi_shang,words_key,words_autophrase):
if len(words_nigudu)>1000:
words_nigudu=words_nigudu[0:1000]
if len(words_xin_xi_shang) > 1000:
words_xin_xi = words_xin_xi_shang[0:1000]
if len(words_key) > 1000:
words_key = words_key[0:1000]
if len(words_autophrase) > 1000:
words_autophrase = words_autophrase[0:1000]
return words_nigudu+words_xin_xi_shang+words_key+words_autophrase
result=Summary_4000(words_nigudu,words_xin_xi_shang,words_key,words_autophrase)
print("完成了",len(result))
#path = '/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file/'
key_filename=path+"关键词_"+str(data_name)
print("key_filename",key_filename)
with open(key_filename, "a+") as f:
for i in result:
f.write(i)
f.write("\n")

return len(result)



# 定义文件的保存路径和文件名尾缀
UPLOAD_FOLDER = os.path.join(pwd, 'save_file')
ALLOWED_EXTENSIONS = {'txt'}
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

HOST = "172.25.109.2"
PORT = 5000


@app.route('/index',methods=['GET', 'POST'])
def index():# 第一个页面是进行上传的页面
"""
返回一个网页端提交的页面
:return:
"""
html = Template("""
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>关键词提取接口(智能客服)</title>
<style>
body{width:1450px; height:500px; text-align:center; line-height:50px; border:blue solid 1px;}
</style>
</head>
<body>
<div><strong>key_word(1.0)</strong></div>
<form action = "http://$HOST:$PORT/upload" method = "POST"
enctype = "multipart/form-data">
<input type = "file" name = "file" />
<input type = "submit"/>
</form>
<p>温馨提示:文件名最好用当日时间命名比如:20211207.txt,避免文件命名重复,且不可出现中文命名!</p>
<p>温馨提示:该接口每小时可以处理35万条文本数据,请自行估计时间</p>
<p>温馨提示:生成关键词文件:关键词_20211207.txt</p>
<p><a href="http://172.25.109.2:5000/index4_5">文件查看+文件进行关键词提取</a></p>
<p><a href="http://172.25.109.2:5000/index1">文件查看+文件下载</a></p>
<p><a href="http://172.25.109.2:5000/index3">文件查看+文件删除</a></p>
</body>
</html>
""")
html = html.substitute({"HOST": HOST, "PORT": PORT})
return html

# 开始页面
@app.route('/index1')#, methods=['GET', 'POST']
def index1():
resp=os.walk("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file")
data_list = []
for root, dirs, files in resp:
data_list.append(files)
if len(data_list) > 0:
data_list=data_list[0]
print(data_list)
return render_template("index1.html", data=data_list,herf=" ")
# 下载页面显示➕下载之后继续跳转
@app.route('/index2',methods=['POST'])#, methods=['GET', 'POST']
def index2():
resp=os.walk("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file")
data_list = []
for root, dirs, files in resp:
data_list.append(files)
if len(data_list) > 0:
data_list=data_list[0]

data = request.form['Name']
print(data)
if data not in data_list:
resp = os.walk("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file")
data_list1 = []
for root, dirs, files in resp:
data_list1.append(files)
if len(data_list1) > 0:
data1= data_list1[0]
else:
data1=[]
attachment_url=""
tishi="您输入的文件名有误!"
return render_template("index2.html", data=data1, tishi=tishi, herf=attachment_url)
elif data in data_list:
attachment_url = 'http://172.25.109.2:5000/download?fileId='+str(data)
data1 = ""
tishi = "请用浏览器访问这个链接下载!"
return render_template("index2.html",data=data1 ,tishi =tishi ,herf=attachment_url )

# 删除页面显示
@app.route('/index3',)#, methods=['GET', 'POST']
def index3():
resp=os.walk("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file")
data_list = []
for root, dirs, files in resp:
data_list.append(files)
if len(data_list) > 0:
data_list=data_list[0]
return render_template("index3.html",data=data_list,herf=" " )
# 删除返回页面
@app.route('/index4',methods=['GET', 'POST'])#, methods=['GET', 'POST']
def index4():
resp=os.walk("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file")
data_list = []
for root, dirs, files in resp:
data_list.append(files)
if len(data_list)>0:
data_list=data_list[0]
data_name = request.form['Name']
print(data_name)
if data_name not in data_list:
resp1 = os.walk("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file")
data_list1 = []
for root, dirs, files in resp1:
data_list1.append(files)
if len(data_list1) > 0:
data1 = data_list1[0]
else:
data1=[]
tishi="您输入的文件名有误!"
attachment_url=""
return render_template("index3.html", data=data1, tishi=tishi, herf=attachment_url)
elif data_name in data_list:
attachment_url =""
tishi = " "
os.remove("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file/" + str(data_name))
resp1 = os.walk("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file")
data_list2 = []
for root, dirs, files in resp1:
data_list2.append(files)
if len(data_list2) > 0:
data1 = data_list2[0]
return render_template("index3.html",data=data1,tishi=tishi,herf=attachment_url )
# 提取文件显示
@app.route('/index4_5')#, methods=['GET', 'POST']
def index4_5():
resp=os.walk("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file")
data_list = []
for root, dirs, files in resp:
data_list.append(files)
if len(data_list)>0:
data_list=data_list[0]
return render_template("index4_5.html",data=data_list,herf=" " )

# 文件进行提取操作
@app.route('/index5',methods=['GET', 'POST'])#, methods=['GET', 'POST']
def index5():
resp=os.walk("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file")
data_list = []
for root, dirs, files in resp:
data_list.append(files)
print(data_list)
if len(data_list)>0:
data_list=data_list[0]
else:
data_list=[]
data_name = request.form['Name']
print(data_name)
if data_name not in data_list:
resp1 = os.walk("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file")
data_list1 = []
for root, dirs, files in resp1:
data_list1.append(files)
if len(data_list1)>0:
data1= data_list1[0]
else:
data1=[]
tishi="您输入的文件名有误! "
attachment_url=""
return render_template("index5.html", data=data1, tishi=tishi, herf=attachment_url)
elif data_name in data_list:
txt_path = '/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file/'

# while 1:
# a=0
# if a==0:
# key_word_sum = action_keyword(txt_path)
# a=a+1
#
# time.sleep(2)
# attachment_url = ""
# tishi = "文件正在提取中...,请勿重复提交!"
# data1="每次只对一个文件进行提取"
# rep = os.walk("/Users/didi/Documents/三种方法词表挖掘线上项目/flask 文件下载上传/save_file")
# name_file= []
# for root, dirs, files in rep:
# name_file.append(files)
# name_files = name_file[0]
# if "关键词_"+str(data_name) in name_files:
# break
# return render_template("index5.html",data=data1,tishi=tishi,herf=attachment_url )

key_word_sum = action_keyword(txt_path,data_name)
return '<a href="http://172.25.109.2:5000/index">程序执行结束,点击返回首页</a>'
# attachment_url = ""
# tishi = "文件提取完成"
# data1 = "提取到关键词"+str(key_word_sum)+"个"
# return render_template("index5.html", data=data1, tishi=tishi, herf=attachment_url)

def allowed_file(filename):
"""
检验文件名尾缀是否满足格式要求
:param filename:
:return:
"""
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS


@app.route('/upload', methods=['GET', 'POST'])
def upload_file():
"""
上传文件到save_file文件夹
以requests上传举例
wiht open('路径','rb') as file_obj:
rsp = requests.post('http://localhost:5000/upload,files={'file':file_obj})
print(rsp.text) --> file uploaded successfully
"""
if 'file' not in request.files:
return "No file part"
file = request.files['file']
if file.filename == '':
return 'No selected file'
if file and allowed_file(file.filename):
filename = secure_filename(file.filename)
print(app.config['UPLOAD_FOLDER'])
print(filename)
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
return '<a href="http://172.25.109.2:5000/index">返回提交首页</a>'
return "file uploaded Fail"


@app.route("/download")
def download_file():
"""
下载save_file目录下面的文件
eg:下载当前目录下面的123.tar 文件,eg:http://localhost:5000/download?fileId=123.tar
:return:
"""
file_name = request.args.get('fileId')
file_path = os.path.join(pwd, 'save_file', file_name)
if os.path.isfile(file_path):
return send_file(file_path, as_attachment=True)
else:
return "The downloaded file does not exist"

#
# if __name__ == '__main__':
# app.run(host=HOST, port=PORT)
from gevent import pywsgi
if __name__ == '__main__':
server = pywsgi.WSGIServer(('0.0.0.0', 5000), app)
server.serve_forever()
# http://172.25.109.2:5000/index
# http://172.25.109.2:5000/index1
# url:http://172.25.109.2:5000/download?fileId=test.txt

# 下面这种方式也是可以的
# if __name__ == "__main__":
# print("端口号={}".format(FLAGS.port))
# app.run(host="10.178.27.59", port=FLAGS.port, debug=True)

index1.html:

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>文件显示</title>
<style>
body{width:1450px; height:500px; text-align:center; line-height:50px; border:blue solid 1px;}
</style>
</head>
<body>
<div>文件显示</div>
<p>{{data}}</p>
<p>{{herf}}</p>
<form action = "http://172.25.109.2:5000/index2" method = "POST">
<p>请输入您要下载的文件名<input type = "text" name = "Name" /></p>
<p><input type = "submit" value = "提交" /></p>
</form>
<a href="http://172.25.109.2:5000/index">返回提交首页</a>
</body>
</html>

index2.html

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>文件显示</title>
<style>
body{width:1450px; height:500px; text-align:center; line-height:50px; border:blue solid 1px;}
</style>
</head>
<body>
<div>文件显示</div>
<p>{{data}}</p>
<p>{{tishi}}</p>
<p>{{herf}}</p>
<form action = "http://172.25.109.2:5000/index2" method = "POST">
<p>请输入您要下载的文件名<input type = "text" name = "Name" /></p>
<p><input type = "submit" value = "提交" /></p>
</form>
<a href="http://172.25.109.2:5000/index">返回提交首页</a>
</body>
</html>

index3.html 

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>文件显示</title>
<style>
body{width:1450px; height:500px; text-align:center; line-height:50px; border:blue solid 1px;}
</style>
</head>
<body>
<div>文件显示</div>

<p>{{data}}</p>
<p>{{tishi}}</p>
<p>{{herf}}</p>

<form action = "http://172.25.109.2:5000/index4" method = "POST">
<p>请输入您要删除的文件名<input type = "text" name = "Name" /></p>
<p><input type = "submit" value = "提交" /></p>
</form>
<a href="http://172.25.109.2:5000/index">返回提交首页</a>
</body>
</html>

index4.html 

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>文件显示</title>
<style>
body{width:1450px; height:500px; text-align:center; line-height:50px; border:blue solid 1px;}
</style>
</head>
<body>
<div>文件显示</div>
<p>{{data}}</p>
<p>{{herf}}</p>

<form action = "http://172.25.109.2:5000/index2" method = "POST">
<p>请输入您要删除的文件名<input type = "text" name = "Name" /></p>
<p><input type = "submit" value = "提交" /></p>
</form>
<a href="http://172.25.109.2:5000/index">返回提交首页</a>
</body>
</html>

index4_5.html

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>文件显示</title>
<style>
body{width:1450px; height:500px; text-align:center; line-height:50px; border:blue solid 1px;}
</style>
</head>
<body>
<div>文件显示</div>
<p>{{data}}</p>
<p>{{tishi}}</p>
<p>{{herf}}</p>
<form action = "http://172.25.109.2:5000/index5" method = "POST">
<p>请输入您要提取的文件名<input type = "text" name = "Name" /></p>
<p><input type = "submit" value = "提交" /></p>
</form>
<p>每次只能提交一个文件,提交之后请在页面静候等待</p>
<a href="http://172.25.109.2:5000/index">返回提交首页</a>
</body>
</html>

index5.html 

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>文件显示</title>
<style>
body{width:1450px; height:500px; text-align:center; line-height:50px; border:blue solid 1px;}
</style>
</head>
<body>
<div>文件显示</div>
<p>{{data}}</p>
<p>{{tishi}}</p>
<p>{{herf}}</p>
<form action = "http://172.25.109.2:5000/index5" method = "POST">
<p>请输入您要提取的文件名<input type = "text" name = "Name" /></p>
<p><input type = "submit" value = "提交" /></p>
</form>
<p>每次只能提交一个文件,提交之后请在页面静候等待</p>
<a href="http://172.25.109.2:5000/index">返回提交首页</a>
</body>
</html>

最后看一下效果图:

基于flask 搭建新词发现提取网站_后端_02