2024-09-27 14:24:18 +02:00
|
|
|
import warnings
|
|
|
|
from datetime import datetime
|
2024-09-27 17:11:21 +02:00
|
|
|
import json
|
|
|
|
import os
|
2024-09-27 14:24:18 +02:00
|
|
|
|
2024-09-27 14:53:16 +02:00
|
|
|
from src.scrub import scrub_web
|
|
|
|
from src.key import create_queries
|
2024-09-27 18:18:21 +02:00
|
|
|
from src.evaluate import sort_results, CLS_POOLING, MEAN_POOLING, MAX_POOLING
|
2024-09-27 14:24:18 +02:00
|
|
|
|
|
|
|
# Suppress FutureWarnings and other warnings
|
|
|
|
warnings.simplefilter(action='ignore', category=FutureWarning)
|
|
|
|
|
|
|
|
|
2024-09-27 18:18:21 +02:00
|
|
|
def hin_fetch(subject, weights, pooling):
|
2024-09-27 17:11:21 +02:00
|
|
|
current_time = datetime.now().strftime("%m-%d_%H-%M")
|
|
|
|
data_path = f"web_data/{hash(subject)}.json"
|
|
|
|
file_path = f"logs/run_{current_time}.md"
|
|
|
|
log_content = f"# Hin run, {current_time}\n\nSubject : {subject}\n\n"
|
2024-09-27 14:24:18 +02:00
|
|
|
|
2024-09-27 17:11:21 +02:00
|
|
|
results = []
|
|
|
|
|
|
|
|
if os.path.exists(data_path) :
|
|
|
|
log_content += f"## Query results from {data_path}*\n\n"
|
|
|
|
print(f"* Subject known from {data_path}")
|
|
|
|
|
|
|
|
with open(data_path, 'r', encoding='utf-8') as f:
|
|
|
|
results = json.load(f)
|
|
|
|
else :
|
|
|
|
queries, keyword_log = create_queries(subject)
|
|
|
|
log_content += keyword_log
|
2024-09-27 14:24:18 +02:00
|
|
|
|
2024-09-27 17:11:21 +02:00
|
|
|
results, scrub_log = scrub_web(queries)
|
|
|
|
log_content += scrub_log
|
2024-09-27 14:24:18 +02:00
|
|
|
|
2024-09-27 17:11:21 +02:00
|
|
|
with open(data_path, 'w', encoding='utf-8') as f:
|
|
|
|
json.dump(results, f, ensure_ascii=False, indent=4)
|
|
|
|
|
|
|
|
log_content += f"*Stored results in {data_path}*\n\n"
|
|
|
|
print(f"\n* Stored results in {data_path}")
|
|
|
|
|
2024-09-27 18:18:21 +02:00
|
|
|
sorted_results, results_log = sort_results(subject, results, weights, pooling)
|
2024-09-27 17:11:21 +02:00
|
|
|
log_content += results_log
|
|
|
|
|
|
|
|
print("### Done ###\n")
|
|
|
|
|
|
|
|
report = "## Results\n"
|
|
|
|
# Print the top 10 results
|
|
|
|
for idx, result in enumerate(sorted_results[:10], 1):
|
|
|
|
report += f"\nRank {idx} ({result['score']:.4f}):\nTitle: {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\n" + "-" * 40
|
|
|
|
|
|
|
|
print(report + "\n")
|
|
|
|
|
|
|
|
# Create and save the file
|
|
|
|
with open(file_path, 'w') as file:
|
|
|
|
file.write(log_content + report)
|
|
|
|
|
|
|
|
#subject = input("Enter subject : ")
|
|
|
|
subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
|
|
|
|
|
2024-09-27 18:18:21 +02:00
|
|
|
# hin_fetch(subject, [title_weight, snippet_weight], [title_pooling, snippet_pooling])
|
|
|
|
hin_fetch(subject, [1,1], [CLS_POOLING, MAX_POOLING])
|