hin/main.py

62 lines
2.1 KiB
Python
Raw Normal View History

2024-09-27 14:24:18 +02:00
import warnings
from datetime import datetime
2024-09-27 17:11:21 +02:00
import json
import os
2024-09-27 14:24:18 +02:00
2024-09-27 14:53:16 +02:00
from src.scrub import scrub_web
from src.key import create_queries
2024-09-27 18:18:21 +02:00
from src.evaluate import sort_results, CLS_POOLING, MEAN_POOLING, MAX_POOLING
2024-09-27 14:24:18 +02:00
# Suppress FutureWarnings and other warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
2024-09-27 18:18:21 +02:00
def hin_fetch(subject, weights, pooling):
2024-09-27 17:11:21 +02:00
current_time = datetime.now().strftime("%m-%d_%H-%M")
data_path = f"web_data/{hash(subject)}.json"
file_path = f"logs/run_{current_time}.md"
log_content = f"# Hin run, {current_time}\n\nSubject : {subject}\n\n"
2024-09-27 14:24:18 +02:00
2024-09-27 17:11:21 +02:00
results = []
if os.path.exists(data_path) :
log_content += f"## Query results from {data_path}*\n\n"
print(f"* Subject known from {data_path}")
with open(data_path, 'r', encoding='utf-8') as f:
results = json.load(f)
else :
queries, keyword_log = create_queries(subject)
log_content += keyword_log
2024-09-27 14:24:18 +02:00
2024-09-27 17:11:21 +02:00
results, scrub_log = scrub_web(queries)
log_content += scrub_log
2024-09-27 14:24:18 +02:00
2024-09-27 17:11:21 +02:00
with open(data_path, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=4)
log_content += f"*Stored results in {data_path}*\n\n"
print(f"\n* Stored results in {data_path}")
2024-09-27 18:18:21 +02:00
sorted_results, results_log = sort_results(subject, results, weights, pooling)
2024-09-27 17:11:21 +02:00
log_content += results_log
print("### Done ###\n")
report = "## Results\n"
# Print the top 10 results
for idx, result in enumerate(sorted_results[:10], 1):
report += f"\nRank {idx} ({result['score']:.4f}):\nTitle: {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\n" + "-" * 40
print(report + "\n")
# Create and save the file
with open(file_path, 'w') as file:
file.write(log_content + report)
#subject = input("Enter subject : ")
subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
2024-09-27 18:18:21 +02:00
# hin_fetch(subject, [title_weight, snippet_weight], [title_pooling, snippet_pooling])
hin_fetch(subject, [1,1], [CLS_POOLING, MAX_POOLING])