groq api
This commit is contained in:
parent
a266739b51
commit
5ddeb26891
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
||||||
/models
|
/models
|
||||||
/logs/*
|
/logs/*
|
||||||
/web_data/*
|
/web_data/*
|
||||||
|
api.py
|
||||||
|
|
3
Pipfile
3
Pipfile
|
@ -14,6 +14,9 @@ nltk = "*"
|
||||||
spacy = "*"
|
spacy = "*"
|
||||||
numpy = "*"
|
numpy = "*"
|
||||||
gensim = "*"
|
gensim = "*"
|
||||||
|
scikit-learn = "*"
|
||||||
|
pandas = "*"
|
||||||
|
groq = "*"
|
||||||
|
|
||||||
[dev-packages]
|
[dev-packages]
|
||||||
|
|
||||||
|
|
24
main.py
24
main.py
|
@ -1,16 +1,11 @@
|
||||||
import warnings
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from src.scrub import scrub_web
|
from src.scrub import scrub_web
|
||||||
from src.key import create_queries
|
from src.key3 import create_queries
|
||||||
from src.evaluate import sort_results, CLS_POOLING, MEAN_POOLING, MAX_POOLING
|
from src.evaluate import sort_results, CLS_POOLING, MEAN_POOLING, MAX_POOLING
|
||||||
|
|
||||||
# Suppress FutureWarnings and other warnings
|
|
||||||
warnings.simplefilter(action='ignore', category=FutureWarning)
|
|
||||||
|
|
||||||
|
|
||||||
def hin_fetch(subject, weights, pooling):
|
def hin_fetch(subject, weights, pooling):
|
||||||
current_time = datetime.now().strftime("%m-%d_%H-%M")
|
current_time = datetime.now().strftime("%m-%d_%H-%M")
|
||||||
data_path = f"web_data/{hash(subject)}.json"
|
data_path = f"web_data/{hash(subject)}.json"
|
||||||
|
@ -55,13 +50,16 @@ def hin_fetch(subject, weights, pooling):
|
||||||
file.write(log_content + report)
|
file.write(log_content + report)
|
||||||
|
|
||||||
#subject = input("Enter subject : ")
|
#subject = input("Enter subject : ")
|
||||||
subject = "State of the art on the identification of wood structure natural frequencies. Influence of the mechanical properties and interest in sensitivity analysis as prospects for reverse identification method of wood elastic properties."
|
#subject = "State of the art on the identification of wood structure natural frequencies. Influence of the mechanical properties and interest in sensitivity analysis as prospects for reverse identification method of wood elastic properties."
|
||||||
#subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
|
#subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
|
||||||
|
#subject = "Dynamic response of carbon-epoxy laminates including a perforated viscoelastic film"
|
||||||
|
subject = "tig welding of inconel 625 and influences on micro structures"
|
||||||
|
#subject = "Thermo-Mechanical Impact of temperature oscillations on bonding and metallization for SiC MOSFETs soldered on ceramic substrate"
|
||||||
|
|
||||||
# hin_fetch(subject, [title_weight, snippet_weight], [title_pooling, snippet_pooling])
|
# hin_fetch(subject, [title_weight, snippet_weight], [title_pooling, snippet_pooling])
|
||||||
hin_fetch(subject, [1,0], [CLS_POOLING, MAX_POOLING])
|
hin_fetch(subject, [2,1], [CLS_POOLING, MAX_POOLING])
|
||||||
hin_fetch(subject, [1,0], [MEAN_POOLING,MAX_POOLING])
|
#hin_fetch(subject, [1,0], [MEAN_POOLING,MAX_POOLING])
|
||||||
hin_fetch(subject, [1,0], [MAX_POOLING, MAX_POOLING])
|
#hin_fetch(subject, [1,0], [MAX_POOLING, MAX_POOLING])
|
||||||
hin_fetch(subject, [0,1], [CLS_POOLING, CLS_POOLING])
|
#hin_fetch(subject, [0,1], [CLS_POOLING, CLS_POOLING])
|
||||||
hin_fetch(subject, [0,1], [CLS_POOLING, MEAN_POOLING])
|
#hin_fetch(subject, [0,1], [CLS_POOLING, MEAN_POOLING])
|
||||||
hin_fetch(subject, [0,1], [CLS_POOLING, MAX_POOLING])
|
#hin_fetch(subject, [0,1], [CLS_POOLING, MAX_POOLING])
|
||||||
|
|
|
@ -12,6 +12,6 @@ mkShell {
|
||||||
shellHook = ''
|
shellHook = ''
|
||||||
export LD_LIBRARY_PATH="${pkgs.stdenv.cc.cc.lib}/lib";
|
export LD_LIBRARY_PATH="${pkgs.stdenv.cc.cc.lib}/lib";
|
||||||
export LD_LIBRARY_PATH="${pkgs.zlib}/lib:$LD_LIBRARY_PATH";
|
export LD_LIBRARY_PATH="${pkgs.zlib}/lib:$LD_LIBRARY_PATH";
|
||||||
alias run="pipenv run python main.py; notify-send -u normal -a 'Hin' 'finished'"
|
alias hin="pipenv run python main.py; notify-send -u normal -a 'Hin' 'finished'"
|
||||||
'';
|
'';
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,10 @@ import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import progressbar
|
import progressbar
|
||||||
import math
|
import math
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
# Suppress FutureWarnings and other warnings
|
||||||
|
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||||
|
|
||||||
CLS_POOLING = 1
|
CLS_POOLING = 1
|
||||||
MEAN_POOLING = 2
|
MEAN_POOLING = 2
|
||||||
|
@ -66,7 +70,7 @@ def score_results(subject, results, weights, pooling):
|
||||||
subject_model_output = get_subject_output(subject)
|
subject_model_output = get_subject_output(subject)
|
||||||
print("* Tokenized subject\n")
|
print("* Tokenized subject\n")
|
||||||
|
|
||||||
scored_results_urls = []
|
scored_results_titles = []
|
||||||
scored_results = []
|
scored_results = []
|
||||||
|
|
||||||
print("* Started scoring results\n")
|
print("* Started scoring results\n")
|
||||||
|
@ -89,18 +93,18 @@ def score_results(subject, results, weights, pooling):
|
||||||
for result in results :
|
for result in results :
|
||||||
progress += 1
|
progress += 1
|
||||||
bar.update(progress)
|
bar.update(progress)
|
||||||
|
|
||||||
|
if not ("content" in result) :
|
||||||
|
continue
|
||||||
|
|
||||||
title = result['title']
|
title = result['title']
|
||||||
url = result['url']
|
url = result['url']
|
||||||
snippet = result['content']
|
snippet = result['content']
|
||||||
|
|
||||||
if title == subject :
|
if title in scored_results_titles :
|
||||||
found_original = True
|
|
||||||
|
|
||||||
if url in scored_results_urls :
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
scored_results_urls.append(url)
|
scored_results_titles.append(title)
|
||||||
|
|
||||||
# Compute similarity between subject and result
|
# Compute similarity between subject and result
|
||||||
|
|
||||||
|
|
38
src/key2.py
Normal file
38
src/key2.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from itertools import combinations
|
||||||
|
|
||||||
|
vectorizer = TfidfVectorizer(stop_words='english')
|
||||||
|
|
||||||
|
def create_queries(subject) :
|
||||||
|
|
||||||
|
print("\n### Getting Keywords ###\n")
|
||||||
|
|
||||||
|
tfidf_matrix = vectorizer.fit_transform([subject])
|
||||||
|
|
||||||
|
feature_names = vectorizer.get_feature_names_out()
|
||||||
|
|
||||||
|
print("* Preparation done")
|
||||||
|
|
||||||
|
sorted_indices = tfidf_matrix[0].toarray()[0].argsort()[::-1]
|
||||||
|
|
||||||
|
text_keywords = []
|
||||||
|
|
||||||
|
for i in range(5): # Change 3 to however many keywords you want
|
||||||
|
if i < len(sorted_indices):
|
||||||
|
text_keywords.append(feature_names[sorted_indices[i]])
|
||||||
|
|
||||||
|
log = f"## Keywords\n\n{text_keywords}\n\n"
|
||||||
|
|
||||||
|
queries = []
|
||||||
|
|
||||||
|
for r in range(1, len(text_keywords) + 1):
|
||||||
|
comb = combinations(text_keywords, r)
|
||||||
|
queries.extend(comb)
|
||||||
|
|
||||||
|
final_queries = [subject] + ["\"" + "\" OR \"".join(query) + "\"" for query in queries]
|
||||||
|
|
||||||
|
#final_queries.ins(subject)
|
||||||
|
|
||||||
|
print("* query generated")
|
||||||
|
|
||||||
|
return final_queries, log
|
24
src/key3.py
Normal file
24
src/key3.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
from groq import Groq
|
||||||
|
from src.api import KEY
|
||||||
|
|
||||||
|
client = Groq(
|
||||||
|
api_key=KEY,
|
||||||
|
)
|
||||||
|
|
||||||
|
def create_queries(subject) :
|
||||||
|
|
||||||
|
chat_completion = client.chat.completions.create(
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"Generate 15 google scholar queries from this subject : \"{subject}\" Your response should only contain the queries, no title, no quotation marks, no numbers, one per line.",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
model="llama3-8b-8192",
|
||||||
|
)
|
||||||
|
|
||||||
|
log = ""
|
||||||
|
|
||||||
|
queries = chat_completion.choices[0].message.content.split("\n")
|
||||||
|
|
||||||
|
return queries, log
|
37
src/test.py
37
src/test.py
|
@ -1,25 +1,24 @@
|
||||||
import requests
|
from groq import Groq
|
||||||
|
|
||||||
subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
|
client = Groq(
|
||||||
|
api_key=KEY,
|
||||||
|
)
|
||||||
|
|
||||||
searxng_url = "https://search.penwing.org/search"
|
def create_queries(subject) :
|
||||||
|
|
||||||
params = {
|
chat_completion = client.chat.completions.create(
|
||||||
"q": subject, # Your search query
|
messages=[
|
||||||
"format": "json", # Requesting JSON format
|
{
|
||||||
"categories": "science", # You can specify categories (optional)
|
"role": "user",
|
||||||
}
|
"content": f"Generate 15 google scholar queries from this subject : \"{subject}\" Your response should only contain the queries, no title, no quotation marks, no numbers, one per line.",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
model="llama3-8b-8192",
|
||||||
|
)
|
||||||
|
|
||||||
response = requests.get(searxng_url, params=params)
|
log = ""
|
||||||
|
|
||||||
if response.status_code == 200:
|
queries = chat_completion.choices[0].message.content.split("\n")
|
||||||
data = response.json()
|
print(queries)
|
||||||
|
|
||||||
# List to store results with similarity scores
|
return queries, log
|
||||||
scored_results = []
|
|
||||||
|
|
||||||
for result in data.get("results", []):
|
|
||||||
print(result['title'])
|
|
||||||
print("---")
|
|
||||||
else:
|
|
||||||
print(f"Error: {response.status_code}")
|
|
||||||
|
|
Loading…
Reference in a new issue