Compare commits
10 commits
5a815643a7
...
a266739b51
Author | SHA1 | Date | |
---|---|---|---|
a266739b51 | |||
cb36ef8bd2 | |||
1bb5922b98 | |||
ebfc48fdcb | |||
3489ade151 | |||
b3f0bea0e5 | |||
ef5c154a1e | |||
5a51a383ed | |||
7f4bc61fa8 | |||
2539b919c2 |
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1,2 +1,3 @@
|
|||
/target
|
||||
/models
|
||||
/logs/*
|
||||
/web_data/*
|
||||
|
|
5
Pipfile
5
Pipfile
|
@ -9,6 +9,11 @@ torch = "*"
|
|||
requests = "*"
|
||||
keybert = "*"
|
||||
progressbar = "*"
|
||||
rake-nltk = "*"
|
||||
nltk = "*"
|
||||
spacy = "*"
|
||||
numpy = "*"
|
||||
gensim = "*"
|
||||
|
||||
[dev-packages]
|
||||
|
||||
|
|
3
README.md
Normal file
3
README.md
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Hin
|
||||
|
||||
A searxng/BERT mix to find science papers more efficiently, given a subject
|
36
evaluate.py
36
evaluate.py
|
@ -1,36 +0,0 @@
|
|||
import warnings
|
||||
from transformers import AutoTokenizer, AutoModel
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
# Suppress FutureWarnings and other warnings
|
||||
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||
|
||||
# Load the tokenizer and the model
|
||||
tokenizer = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_uncased')
|
||||
model = AutoModel.from_pretrained('allenai/scibert_scivocab_uncased')
|
||||
|
||||
# Function to compute sentence embeddings by pooling token embeddings (CLS token)
|
||||
def get_sentence_embedding(text):
|
||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
||||
with torch.no_grad():
|
||||
outputs = model(**inputs)
|
||||
|
||||
# Pooling strategy: Use the hidden state of the [CLS] token as the sentence embedding
|
||||
cls_embedding = outputs.last_hidden_state[:, 0, :] # Shape: (batch_size, hidden_size)
|
||||
return cls_embedding
|
||||
|
||||
# Example subject and abstract
|
||||
subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
|
||||
abstract = """
|
||||
The research work presented in this paper aims to optimise the dynamic response of a carbon-epoxy plate by including into the laminate one frequency-dependent interleaved viscoelastic layer. To keep an acceptable bending stiffness, some holes are created in the viscoelastic layer, thus facilitating the resin through layer pene- tration during the co-curing manufacturing process. Plates including (or not) one perforated (or non-perforated) viscoelastic layer are manufactured and investigated experimentally and numerically. First, static and dynamic tests are performed on sandwich coupons to characterise the stiffness and damping properties of the plates in a given frequency range. Resulting mechanical properties are then used to set-up a finite element model and simulate the plate dynamic response. In parallel, fre- quency response measurements are carried out on the manufactured plates, then successfully confronted to the numerical results. Finally, a design of experiments is built based on a limited number on numerical simulations to find the configuration of bridges that maximises the damping while keeping a stiffness higher than half the stiffness of the equivalent undamped plate."""
|
||||
|
||||
# Get embeddings
|
||||
subject_embedding = get_sentence_embedding(subject)
|
||||
abstract_embedding = get_sentence_embedding(abstract)
|
||||
|
||||
# 2. **Measure Semantic Similarity Using Cosine Similarity**
|
||||
|
||||
# Compute cosine similarity between subject and abstract embeddings
|
||||
similarity = F.cosine_similarity(subject_embedding, abstract_embedding)
|
||||
print(f"Cosine Similarity: {similarity.item():.4f}")
|
25
key.py
25
key.py
|
@ -1,25 +0,0 @@
|
|||
from keybert import KeyBERT
|
||||
from transformers import AutoTokenizer, AutoModel
|
||||
|
||||
# Load the SciBERT model and tokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_uncased')
|
||||
|
||||
print("* Tokenizer")
|
||||
|
||||
model = AutoModel.from_pretrained('allenai/scibert_scivocab_uncased')
|
||||
|
||||
print("* Scibert model")
|
||||
|
||||
# Define a KeyBERT model using SciBERT embeddings
|
||||
kw_model = KeyBERT(model=model)
|
||||
|
||||
print("* Keybert model")
|
||||
# Define the subject from which to extract keywords
|
||||
subject = "tig welding of inconel 625 and influences on micro structures"
|
||||
|
||||
# Extract keywords from the subject
|
||||
keywords = kw_model.extract_keywords(subject, keyphrase_ngram_range=(1, 2), stop_words='english', use_maxsum=True)
|
||||
|
||||
# Print extracted keywords
|
||||
for keyword, score in keywords:
|
||||
print(f"Keyword: {keyword}, Score: {score:.4f}")
|
67
main.py
Normal file
67
main.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
import warnings
|
||||
from datetime import datetime
|
||||
import json
|
||||
import os
|
||||
|
||||
from src.scrub import scrub_web
|
||||
from src.key import create_queries
|
||||
from src.evaluate import sort_results, CLS_POOLING, MEAN_POOLING, MAX_POOLING
|
||||
|
||||
# Suppress FutureWarnings and other warnings
|
||||
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||
|
||||
|
||||
def hin_fetch(subject, weights, pooling):
|
||||
current_time = datetime.now().strftime("%m-%d_%H-%M")
|
||||
data_path = f"web_data/{hash(subject)}.json"
|
||||
file_path = f"logs/run_{current_time}_{weights}{pooling}.md"
|
||||
log_content = f"# Hin run, {current_time}\n\nSubject : {subject}\n\n"
|
||||
|
||||
results = []
|
||||
|
||||
if os.path.exists(data_path) :
|
||||
log_content += f"## Query results from {data_path}*\n\n"
|
||||
print(f"* Subject known from {data_path}")
|
||||
|
||||
with open(data_path, 'r', encoding='utf-8') as f:
|
||||
results = json.load(f)
|
||||
else :
|
||||
queries, keyword_log = create_queries(subject)
|
||||
log_content += keyword_log
|
||||
|
||||
results, scrub_log = scrub_web(queries)
|
||||
log_content += scrub_log
|
||||
|
||||
with open(data_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=4)
|
||||
|
||||
log_content += f"*Stored results in {data_path}*\n\n"
|
||||
print(f"\n* Stored results in {data_path}")
|
||||
|
||||
sorted_results, results_log = sort_results(subject, results, weights, pooling)
|
||||
log_content += results_log
|
||||
|
||||
print("### Done ###\n")
|
||||
|
||||
report = "## Results\n"
|
||||
# Print the top 10 results
|
||||
for idx, result in enumerate(sorted_results[:10], 1):
|
||||
report += f"\nRank {idx} ({result['score']:.4f}):\nTitle: {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\n" + "-" * 40
|
||||
|
||||
print(report + "\n")
|
||||
|
||||
# Create and save the file
|
||||
with open(file_path, 'w') as file:
|
||||
file.write(log_content + report)
|
||||
|
||||
#subject = input("Enter subject : ")
|
||||
subject = "State of the art on the identification of wood structure natural frequencies. Influence of the mechanical properties and interest in sensitivity analysis as prospects for reverse identification method of wood elastic properties."
|
||||
#subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
|
||||
|
||||
# hin_fetch(subject, [title_weight, snippet_weight], [title_pooling, snippet_pooling])
|
||||
hin_fetch(subject, [1,0], [CLS_POOLING, MAX_POOLING])
|
||||
hin_fetch(subject, [1,0], [MEAN_POOLING,MAX_POOLING])
|
||||
hin_fetch(subject, [1,0], [MAX_POOLING, MAX_POOLING])
|
||||
hin_fetch(subject, [0,1], [CLS_POOLING, CLS_POOLING])
|
||||
hin_fetch(subject, [0,1], [CLS_POOLING, MEAN_POOLING])
|
||||
hin_fetch(subject, [0,1], [CLS_POOLING, MAX_POOLING])
|
|
@ -1,131 +0,0 @@
|
|||
import warnings
|
||||
from transformers import AutoTokenizer, AutoModel
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import requests
|
||||
import progressbar
|
||||
|
||||
|
||||
# Me
|
||||
#subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
|
||||
#query = "composite viscoelastic damping"
|
||||
|
||||
# Anne
|
||||
#subject = "State of the art on the identification of wood structure natural frequencies. Influence of the mechanical properties and interest in sensitivity analysis as prospects for reverse identification method of wood elastic properties."
|
||||
#query = "wood frequency analysis mechanical properties"
|
||||
|
||||
# Axel
|
||||
#subject = "Characterization of SiC MOSFET using double pulse test method."
|
||||
#query = "SiC MOSFET double pulse test"
|
||||
|
||||
# Paul
|
||||
#subject = "Thermo-Mechanical Impact of temperature oscillations on bonding and metallization for SiC MOSFETs soldered on ceramic substrate"
|
||||
#query = "thermo mechanical model discrete bonding SiC MOSFET"
|
||||
|
||||
# Jam
|
||||
subject = "tig welding of inconel 625 and influences on micro structures"
|
||||
query = "tig welding inconel 625"
|
||||
|
||||
widgets = [' [',
|
||||
progressbar.Timer(format= 'elapsed time: %(elapsed)s'),
|
||||
'] ',
|
||||
progressbar.Bar('*'),' (',
|
||||
progressbar.ETA(), ') ',
|
||||
]
|
||||
|
||||
# Suppress FutureWarnings and other warnings
|
||||
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||
|
||||
print("\n### Fetching Data ###\n")
|
||||
|
||||
# Load the tokenizer and the model
|
||||
tokenizer = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_uncased')
|
||||
|
||||
print("* Got tokenizer")
|
||||
|
||||
model = AutoModel.from_pretrained('allenai/scibert_scivocab_uncased')
|
||||
|
||||
print("* Got model")
|
||||
|
||||
# Function to compute sentence embeddings by pooling token embeddings (CLS token)
|
||||
def get_sentence_embedding(text):
|
||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
||||
with torch.no_grad():
|
||||
outputs = model(**inputs)
|
||||
|
||||
# Pooling strategy: Use the hidden state of the [CLS] token as the sentence embedding
|
||||
cls_embedding = outputs.last_hidden_state[:, 0, :] # Shape: (batch_size, hidden_size)
|
||||
return cls_embedding
|
||||
|
||||
# Function to compute cosine similarity
|
||||
def compute_similarity(embedding1, embedding2):
|
||||
similarity = F.cosine_similarity(embedding1, embedding2)
|
||||
return similarity.item()
|
||||
|
||||
# Define the SearxNG instance URL and search query
|
||||
searxng_url = "https://search.penwing.org/search" # Replace with your instance URL
|
||||
params = {
|
||||
"q": query, # Your search query
|
||||
"format": "json", # Requesting JSON format
|
||||
"categories": "science", # You can specify categories (optional)
|
||||
}
|
||||
|
||||
# Send the request to SearxNG API
|
||||
response = requests.get(searxng_url, params=params)
|
||||
|
||||
# Check if the request was successful
|
||||
if response.status_code == 200:
|
||||
print("* Got search results")
|
||||
# Parse the JSON response
|
||||
data = response.json()
|
||||
|
||||
subject_embedding = get_sentence_embedding(subject)
|
||||
|
||||
print("* Tokenized subject")
|
||||
|
||||
print("\n### Starting result processing ###\n")
|
||||
# List to store results with similarity scores
|
||||
scored_results = []
|
||||
|
||||
results = data.get("results", [])
|
||||
progress = 0
|
||||
|
||||
bar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), progressbar.Bar()],
|
||||
maxval=len(results)).start()
|
||||
|
||||
# Process each result
|
||||
for result in results :
|
||||
title = result['title']
|
||||
url = result['url']
|
||||
snippet = result['content']
|
||||
|
||||
# Get embedding for the snippet (abstract)
|
||||
snippet_embedding = get_sentence_embedding(snippet)
|
||||
|
||||
# Compute similarity between subject and snippet
|
||||
similarity = compute_similarity(subject_embedding, snippet_embedding)
|
||||
|
||||
# Store the result with its similarity score
|
||||
scored_results.append({
|
||||
'title': title,
|
||||
'url': url,
|
||||
'snippet': snippet,
|
||||
'similarity': similarity
|
||||
})
|
||||
|
||||
progress += 1
|
||||
bar.update(progress)
|
||||
|
||||
# Sort the results by similarity (highest first)
|
||||
top_results = sorted(scored_results, key=lambda x: x['similarity'], reverse=True)[:10]
|
||||
|
||||
print("\n### Done ###\n")
|
||||
# Print the top 10 results
|
||||
for idx, result in enumerate(top_results, 1):
|
||||
print(f"Rank {idx} ({result['similarity']:.4f}):")
|
||||
print(f"Title: {result['title']}")
|
||||
print(f"URL: {result['url']}")
|
||||
print(f"Snippet: {result['snippet']}")
|
||||
print("-" * 40)
|
||||
else:
|
||||
print(f"Error: {response.status_code}")
|
25
scrub.py
25
scrub.py
|
@ -1,25 +0,0 @@
|
|||
import requests
|
||||
|
||||
# Define the SearxNG instance URL and search query
|
||||
searxng_url = "https://search.penwing.org/search" # Replace with your instance URL
|
||||
params = {
|
||||
"q": "zig zag theories", # Your search query
|
||||
"format": "json", # Requesting JSON format
|
||||
"categories": "science", # You can specify categories (optional)
|
||||
}
|
||||
|
||||
# Send the request to SearxNG API
|
||||
response = requests.get(searxng_url, params=params)
|
||||
|
||||
# Check if the request was successful
|
||||
if response.status_code == 200:
|
||||
# Parse the JSON response
|
||||
data = response.json()
|
||||
# Print or process the results
|
||||
for result in data.get("results", []):
|
||||
print(f"Title: {result['title']}")
|
||||
print(f"URL: {result['url']}")
|
||||
print(f"Snippet: {result['content']}")
|
||||
print("-" * 40)
|
||||
else:
|
||||
print(f"Error: {response.status_code}")
|
|
@ -6,9 +6,12 @@ mkShell {
|
|||
pipenv
|
||||
python3
|
||||
stdenv.cc.cc.lib
|
||||
zlib
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
export LD_LIBRARY_PATH="${pkgs.stdenv.cc.cc.lib}/lib";
|
||||
export LD_LIBRARY_PATH="${pkgs.zlib}/lib:$LD_LIBRARY_PATH";
|
||||
alias run="pipenv run python main.py; notify-send -u normal -a 'Hin' 'finished'"
|
||||
'';
|
||||
}
|
||||
|
|
169
src/evaluate.py
Normal file
169
src/evaluate.py
Normal file
|
@ -0,0 +1,169 @@
|
|||
from transformers import AutoTokenizer, AutoModel
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import progressbar
|
||||
import math
|
||||
|
||||
CLS_POOLING = 1
|
||||
MEAN_POOLING = 2
|
||||
MAX_POOLING = 3
|
||||
|
||||
print("\n### Fetching SciBert ###\n")
|
||||
|
||||
# Load the tokenizer and the model
|
||||
tokenizer = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_uncased')
|
||||
|
||||
print("* Got tokenizer")
|
||||
|
||||
model = AutoModel.from_pretrained('allenai/scibert_scivocab_uncased')
|
||||
|
||||
print("* Got model")
|
||||
|
||||
def get_subject_output(subject):
|
||||
subject_inputs = tokenizer(subject, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
||||
with torch.no_grad():
|
||||
subject_outputs = model(**subject_inputs)
|
||||
|
||||
return subject_outputs
|
||||
|
||||
# Function to compute the embedding with a selected pooling method
|
||||
def compute_similarity(subject_outputs, compare_text, pooling_method):
|
||||
# Tokenize the input texts
|
||||
compare_inputs = tokenizer(compare_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
||||
|
||||
# Compute embeddings for both the subject and the comparison text
|
||||
with torch.no_grad():
|
||||
compare_outputs = model(**compare_inputs)
|
||||
|
||||
# Pooling strategies
|
||||
def cls_pooling(output):
|
||||
return output.last_hidden_state[:, 0, :] # CLS token is at index 0
|
||||
|
||||
def mean_pooling(output):
|
||||
return output.last_hidden_state.mean(dim=1) # Mean of all token embeddings
|
||||
|
||||
def max_pooling(output):
|
||||
return output.last_hidden_state.max(dim=1).values # Max of all token embeddings
|
||||
|
||||
# Choose pooling strategy based on the input integer
|
||||
if pooling_method == CLS_POOLING:
|
||||
subject_embedding = cls_pooling(subject_outputs)
|
||||
compare_embedding = cls_pooling(compare_outputs)
|
||||
elif pooling_method == MEAN_POOLING:
|
||||
subject_embedding = mean_pooling(subject_outputs)
|
||||
compare_embedding = mean_pooling(compare_outputs)
|
||||
elif pooling_method == MAX_POOLING:
|
||||
subject_embedding = max_pooling(subject_outputs)
|
||||
compare_embedding = max_pooling(compare_outputs)
|
||||
else:
|
||||
raise ValueError("Pooling method must be 1 (CLS), 2 (Mean), or 3 (Max).")
|
||||
|
||||
return F.cosine_similarity(subject_embedding, compare_embedding).item()
|
||||
|
||||
|
||||
def score_results(subject, results, weights, pooling):
|
||||
|
||||
subject_model_output = get_subject_output(subject)
|
||||
print("* Tokenized subject\n")
|
||||
|
||||
scored_results_urls = []
|
||||
scored_results = []
|
||||
|
||||
print("* Started scoring results\n")
|
||||
|
||||
bar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), progressbar.Bar()],
|
||||
maxval=len(results)).start()
|
||||
|
||||
progress = 0
|
||||
|
||||
title_score_bounds = [1, 0]
|
||||
snippet_score_bounds = [1, 0]
|
||||
|
||||
title_pooling = pooling[0]
|
||||
snippet_pooling = pooling[1]
|
||||
|
||||
|
||||
log = f"Weights : {weights};\n\nPooling : {pooling}\n\n"
|
||||
|
||||
# Process each result
|
||||
for result in results :
|
||||
progress += 1
|
||||
bar.update(progress)
|
||||
|
||||
title = result['title']
|
||||
url = result['url']
|
||||
snippet = result['content']
|
||||
|
||||
if title == subject :
|
||||
found_original = True
|
||||
|
||||
if url in scored_results_urls :
|
||||
continue
|
||||
|
||||
scored_results_urls.append(url)
|
||||
|
||||
# Compute similarity between subject and result
|
||||
|
||||
title_score, snippet_score = 1, 1
|
||||
|
||||
if weights[0] != 0 :
|
||||
title_score = compute_similarity(subject_model_output, title, title_pooling)
|
||||
if weights[1] != 0 :
|
||||
snippet_score = compute_similarity(subject_model_output, snippet, snippet_pooling)
|
||||
|
||||
if title_score < title_score_bounds[0] :
|
||||
title_score_bounds[0] = title_score
|
||||
if title_score > title_score_bounds[1] :
|
||||
title_score_bounds[1] = title_score
|
||||
if snippet_score < snippet_score_bounds[0] :
|
||||
snippet_score_bounds[0] = snippet_score
|
||||
if snippet_score > snippet_score_bounds[1] :
|
||||
snippet_score_bounds[1] = snippet_score
|
||||
|
||||
# Store the result with its similarity score
|
||||
scored_results.append({
|
||||
'title': title,
|
||||
'url': url,
|
||||
'snippet': snippet,
|
||||
'title-score': title_score,
|
||||
'snippet-score': snippet_score
|
||||
})
|
||||
|
||||
log += f"Score bounds : T{title_score_bounds} # S{snippet_score_bounds}\n\n"
|
||||
print("\n\n* Scored results\n")
|
||||
|
||||
normalized_results = []
|
||||
for result in scored_results:
|
||||
title_score, snippet_score = 1, 1
|
||||
|
||||
if weights[0] != 0 :
|
||||
title_score = (result['title-score'] - title_score_bounds[0]) / (title_score_bounds[1] - title_score_bounds[0])
|
||||
if weights[1] != 0 :
|
||||
snippet_score = (result['snippet-score'] - snippet_score_bounds[0]) / (snippet_score_bounds[1] - snippet_score_bounds[0])
|
||||
|
||||
score = math.pow(math.pow(title_score, weights[0]) * math.pow(snippet_score, weights[1]), 1 / (weights[0] + weights[1]))
|
||||
|
||||
normalized_results.append({
|
||||
'title': result['title'],
|
||||
'url': result['url'],
|
||||
'snippet': result['snippet'],
|
||||
'score': score,
|
||||
})
|
||||
|
||||
return normalized_results, log
|
||||
|
||||
|
||||
def sort_results(subject, results, weights, pooling):
|
||||
|
||||
print("\n### Starting result processing (",len(results),") ###\n")
|
||||
|
||||
log = "\n---\n\n## Scoring\n\n"
|
||||
|
||||
scored_results, score_log = score_results(subject, results, weights, pooling)
|
||||
|
||||
log += score_log
|
||||
|
||||
# Sort the results by similarity (highest first)
|
||||
sorted_results = sorted(scored_results, key=lambda x: x['score'], reverse=True)
|
||||
|
||||
return sorted_results, log
|
33
src/key.py
Normal file
33
src/key.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
from keybert import KeyBERT
|
||||
from itertools import combinations
|
||||
|
||||
def create_queries(subject) :
|
||||
|
||||
print("\n### Getting Keywords ###\n")
|
||||
|
||||
kw_model = KeyBERT()
|
||||
|
||||
print("* Got Keybert")
|
||||
|
||||
keywords = kw_model.extract_keywords(subject, keyphrase_ngram_range=(1, 2), stop_words='english', use_mmr=True, diversity=0.7)
|
||||
|
||||
print("* keywords extracted")
|
||||
|
||||
sorted_keywords = sorted(keywords, key=lambda x: -x[1])
|
||||
text_keywords = [x[0] for x in sorted_keywords]
|
||||
|
||||
log = f"## Keywords\n\n{text_keywords}\n\n"
|
||||
|
||||
queries = []
|
||||
|
||||
for r in range(1, len(text_keywords) + 1):
|
||||
comb = combinations(text_keywords, r)
|
||||
queries.extend(comb)
|
||||
|
||||
final_queries = [subject] + ["\"" + "\" OR \"".join(query) + "\"" for query in queries]
|
||||
|
||||
#final_queries.ins(subject)
|
||||
|
||||
print("* query generated")
|
||||
|
||||
return final_queries, log
|
44
src/scrub.py
Normal file
44
src/scrub.py
Normal file
|
@ -0,0 +1,44 @@
|
|||
import requests
|
||||
import progressbar
|
||||
|
||||
searxng_url = "https://search.penwing.org/search"
|
||||
|
||||
def scrub_web(queries) :
|
||||
print("\n### Fetching Web data ###\n")
|
||||
|
||||
web_bar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), progressbar.Bar()],
|
||||
maxval=len(queries)).start()
|
||||
|
||||
progress = 0
|
||||
results = []
|
||||
log = "## Queries\n\n"
|
||||
|
||||
for query in queries :
|
||||
params = {
|
||||
"q": query, # Your search query
|
||||
"format": "json", # Requesting JSON format
|
||||
"categories": "science", # You can specify categories (optional)
|
||||
}
|
||||
|
||||
response = requests.get(searxng_url, params=params)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
|
||||
# List to store results with similarity scores
|
||||
scored_results = []
|
||||
|
||||
results.extend(data.get("results", []))
|
||||
|
||||
log += f"{query};\n"
|
||||
else:
|
||||
print(f"Error: {response.status_code}")
|
||||
|
||||
progress += 1
|
||||
web_bar.update(progress)
|
||||
|
||||
print("")
|
||||
|
||||
|
||||
|
||||
return results, log
|
25
src/test.py
Normal file
25
src/test.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import requests
|
||||
|
||||
subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
|
||||
|
||||
searxng_url = "https://search.penwing.org/search"
|
||||
|
||||
params = {
|
||||
"q": subject, # Your search query
|
||||
"format": "json", # Requesting JSON format
|
||||
"categories": "science", # You can specify categories (optional)
|
||||
}
|
||||
|
||||
response = requests.get(searxng_url, params=params)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
|
||||
# List to store results with similarity scores
|
||||
scored_results = []
|
||||
|
||||
for result in data.get("results", []):
|
||||
print(result['title'])
|
||||
print("---")
|
||||
else:
|
||||
print(f"Error: {response.status_code}")
|
19
subjects
Normal file
19
subjects
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Me
|
||||
subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
|
||||
query = "composite viscoelastic damping"
|
||||
|
||||
# Anne
|
||||
subject = "State of the art on the identification of wood structure natural frequencies. Influence of the mechanical properties and interest in sensitivity analysis as prospects for reverse identification method of wood elastic properties."
|
||||
query = "wood frequency analysis mechanical properties"
|
||||
|
||||
# Axel
|
||||
subject = "Characterization of SiC MOSFET using double pulse test method."
|
||||
query = "SiC MOSFET double pulse test"
|
||||
|
||||
# Paul
|
||||
subject = "Thermo-Mechanical Impact of temperature oscillations on bonding and metallization for SiC MOSFETs soldered on ceramic substrate"
|
||||
query = "thermo mechanical model discrete bonding SiC MOSFET"
|
||||
|
||||
# Jam
|
||||
subject = "tig welding of inconel 625 and influences on micro structures"
|
||||
query = "tig welding inconel 625"
|
Loading…
Reference in a new issue