groq api

2024-09-30 12:24:26 +02:00 · 2024-09-30 12:24:26 +02:00 · 5ddeb26891
parent a266739b51
commit 5ddeb26891
8 changed files with 106 additions and 39 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 /models
 /logs/*
 /web_data/*
+api.py
--- a/3
+++ b/3
@ -14,6 +14,9 @@ nltk = "*"
 spacy = "*"
 numpy = "*"
 gensim = "*"
+scikit-learn = "*"
+pandas = "*"
+groq = "*"

 [dev-packages]

--- a/main.py
+++ b/main.py
@ -1,16 +1,11 @@
-import warnings
 from datetime import datetime
 import json
 import os

 from src.scrub import scrub_web
-from src.key import create_queries
+from src.key3 import create_queries
 from src.evaluate import sort_results, CLS_POOLING, MEAN_POOLING, MAX_POOLING

-# Suppress FutureWarnings and other warnings
-warnings.simplefilter(action='ignore', category=FutureWarning)
-
-
 def hin_fetch(subject, weights, pooling):
    current_time = datetime.now().strftime("%m-%d_%H-%M")
    data_path = f"web_data/{hash(subject)}.json"
@ -55,13 +50,16 @@ def hin_fetch(subject, weights, pooling):
        file.write(log_content + report)

 #subject = input("Enter subject : ")
-subject = "State of the art on the identification of wood structure natural frequencies. Influence of the mechanical properties and interest in sensitivity analysis as prospects for reverse identification method of wood elastic properties."
+#subject = "State of the art on the identification of wood structure natural frequencies. Influence of the mechanical properties and interest in sensitivity analysis as prospects for reverse identification method of wood elastic properties."
 #subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
+#subject = "Dynamic response of carbon-epoxy laminates including a perforated viscoelastic film"
+subject = "tig welding of inconel 625 and influences on micro structures"
+#subject = "Thermo-Mechanical Impact of temperature oscillations on bonding and metallization for SiC MOSFETs soldered on ceramic substrate"

 # hin_fetch(subject, [title_weight, snippet_weight], [title_pooling, snippet_pooling])
-hin_fetch(subject, [1,0], [CLS_POOLING, MAX_POOLING])
-hin_fetch(subject, [1,0], [MEAN_POOLING,MAX_POOLING])
-hin_fetch(subject, [1,0], [MAX_POOLING, MAX_POOLING])
-hin_fetch(subject, [0,1], [CLS_POOLING, CLS_POOLING])
-hin_fetch(subject, [0,1], [CLS_POOLING, MEAN_POOLING])
-hin_fetch(subject, [0,1], [CLS_POOLING, MAX_POOLING])
+hin_fetch(subject, [2,1], [CLS_POOLING, MAX_POOLING])
+#hin_fetch(subject, [1,0], [MEAN_POOLING,MAX_POOLING])
+#hin_fetch(subject, [1,0], [MAX_POOLING, MAX_POOLING])
+#hin_fetch(subject, [0,1], [CLS_POOLING, CLS_POOLING])
+#hin_fetch(subject, [0,1], [CLS_POOLING, MEAN_POOLING])
+#hin_fetch(subject, [0,1], [CLS_POOLING, MAX_POOLING])
--- a/shell.nix
+++ b/shell.nix
@ -12,6 +12,6 @@ mkShell {
  shellHook = ''
      export LD_LIBRARY_PATH="${pkgs.stdenv.cc.cc.lib}/lib";
      export LD_LIBRARY_PATH="${pkgs.zlib}/lib:$LD_LIBRARY_PATH";
-      alias run="pipenv run python main.py; notify-send -u normal -a 'Hin' 'finished'"
+      alias hin="pipenv run python main.py; notify-send -u normal -a 'Hin' 'finished'"
  '';
 }
--- a/src/evaluate.py
+++ b/src/evaluate.py
@ -3,6 +3,10 @@ import torch
 import torch.nn.functional as F
 import progressbar
 import math
+import warnings
+
+# Suppress FutureWarnings and other warnings
+warnings.simplefilter(action='ignore', category=FutureWarning)

 CLS_POOLING = 1
 MEAN_POOLING = 2
@ -66,7 +70,7 @@ def score_results(subject, results, weights, pooling):
    subject_model_output = get_subject_output(subject)
    print("* Tokenized subject\n")
    
-    scored_results_urls = []
+    scored_results_titles = []
    scored_results = []

    print("* Started scoring results\n")
@ -90,17 +94,17 @@ def score_results(subject, results, weights, pooling):
        progress += 1
        bar.update(progress)

+        if not ("content" in result) :
+            continue
+            
        title = result['title']
        url = result['url']
        snippet = result['content']
    
-        if title == subject :
-            found_original = True
-    
-        if url in scored_results_urls :
+        if title in scored_results_titles :
            continue
            
-        scored_results_urls.append(url)
+        scored_results_titles.append(title)
        
        # Compute similarity between subject and result

--- a/src/key2.py
+++ b/src/key2.py
@ -0,0 +1,38 @@
+from sklearn.feature_extraction.text import TfidfVectorizer
+from itertools import combinations
+
+vectorizer = TfidfVectorizer(stop_words='english')
+
+def create_queries(subject) :
+
+    print("\n### Getting Keywords ###\n")
+
+    tfidf_matrix = vectorizer.fit_transform([subject])
+
+    feature_names = vectorizer.get_feature_names_out()
+    
+    print("* Preparation done")
+
+    sorted_indices = tfidf_matrix[0].toarray()[0].argsort()[::-1]
+
+    text_keywords = []
+
+    for i in range(5):  # Change 3 to however many keywords you want
+        if i < len(sorted_indices):
+            text_keywords.append(feature_names[sorted_indices[i]])
+
+    log = f"## Keywords\n\n{text_keywords}\n\n"
+
+    queries = []
+
+    for r in range(1, len(text_keywords) + 1):
+        comb = combinations(text_keywords, r)
+        queries.extend(comb)
+
+    final_queries = [subject] + ["\"" + "\" OR \"".join(query) + "\"" for query in queries]
+
+    #final_queries.ins(subject)
+
+    print("* query generated")
+
+    return final_queries, log
--- a/src/key3.py
+++ b/src/key3.py
@ -0,0 +1,24 @@
+from groq import Groq
+from src.api import KEY
+
+client = Groq(
+    api_key=KEY,
+)
+
+def create_queries(subject) :
+
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": f"Generate 15 google scholar queries from this subject : \"{subject}\" Your response should only contain the queries, no title, no quotation marks, no numbers, one per line.",
+            }
+        ],
+        model="llama3-8b-8192",
+    )
+
+    log = ""
+
+    queries = chat_completion.choices[0].message.content.split("\n")
+
+    return queries, log
--- a/src/test.py
+++ b/src/test.py
@ -1,25 +1,24 @@
-import requests
+from groq import Groq

-subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
+client = Groq(
+    api_key=KEY,
+)

-searxng_url = "https://search.penwing.org/search"
+def create_queries(subject) :

-params = {
-    "q": subject,  # Your search query
-    "format": "json",         # Requesting JSON format
-    "categories": "science",  # You can specify categories (optional)
-}
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": f"Generate 15 google scholar queries from this subject : \"{subject}\" Your response should only contain the queries, no title, no quotation marks, no numbers, one per line.",
+            }
+        ],
+        model="llama3-8b-8192",
+    )

-response = requests.get(searxng_url, params=params)
+    log = ""

-if response.status_code == 200:
-    data = response.json()
+    queries = chat_completion.choices[0].message.content.split("\n")
+    print(queries)

-    # List to store results with similarity scores
-    scored_results = []
-
-    for result in data.get("results", []):
-        print(result['title'])
-        print("---")
-else:
-    print(f"Error: {response.status_code}")
+    return queries, log