groq api

2024-09-30 12:24:26 +02:00 · 2024-09-30 12:24:26 +02:00 · 5ddeb26891
parent a266739b51
commit 5ddeb26891
8 changed files with 106 additions and 39 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 /models
 /logs/*
 /web_data/*
 api.py
--- a/3
+++ b/3
@ -14,6 +14,9 @@ nltk = "*"
 spacy = "*"
 numpy = "*"
 gensim = "*"
 scikit-learn = "*"
 pandas = "*"
 groq = "*"
 [dev-packages]
--- a/main.py
+++ b/main.py
@ -1,16 +1,11 @@
 import warnings
 from datetime import datetime
 import json
 import os
 from src.scrub import scrub_web
-from src.key import create_queries
+from src.key3 import create_queries
 from src.evaluate import sort_results, CLS_POOLING, MEAN_POOLING, MAX_POOLING
 # Suppress FutureWarnings and other warnings
 warnings.simplefilter(action='ignore', category=FutureWarning)
 def hin_fetch(subject, weights, pooling):
    current_time = datetime.now().strftime("%m-%d_%H-%M")
    data_path = f"web_data/{hash(subject)}.json"
@ -55,13 +50,16 @@ def hin_fetch(subject, weights, pooling):
        file.write(log_content + report)
 #subject = input("Enter subject : ")
-subject = "State of the art on the identification of wood structure natural frequencies. Influence of the mechanical properties and interest in sensitivity analysis as prospects for reverse identification method of wood elastic properties."
+#subject = "State of the art on the identification of wood structure natural frequencies. Influence of the mechanical properties and interest in sensitivity analysis as prospects for reverse identification method of wood elastic properties."
 #subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
 #subject = "Dynamic response of carbon-epoxy laminates including a perforated viscoelastic film"
 subject = "tig welding of inconel 625 and influences on micro structures"
 #subject = "Thermo-Mechanical Impact of temperature oscillations on bonding and metallization for SiC MOSFETs soldered on ceramic substrate"
 # hin_fetch(subject, [title_weight, snippet_weight], [title_pooling, snippet_pooling])
-hin_fetch(subject, [1,0], [CLS_POOLING, MAX_POOLING])
+hin_fetch(subject, [2,1], [CLS_POOLING, MAX_POOLING])
-hin_fetch(subject, [1,0], [MEAN_POOLING,MAX_POOLING])
+#hin_fetch(subject, [1,0], [MEAN_POOLING,MAX_POOLING])
-hin_fetch(subject, [1,0], [MAX_POOLING, MAX_POOLING])
+#hin_fetch(subject, [1,0], [MAX_POOLING, MAX_POOLING])
-hin_fetch(subject, [0,1], [CLS_POOLING, CLS_POOLING])
+#hin_fetch(subject, [0,1], [CLS_POOLING, CLS_POOLING])
-hin_fetch(subject, [0,1], [CLS_POOLING, MEAN_POOLING])
+#hin_fetch(subject, [0,1], [CLS_POOLING, MEAN_POOLING])
-hin_fetch(subject, [0,1], [CLS_POOLING, MAX_POOLING])
+#hin_fetch(subject, [0,1], [CLS_POOLING, MAX_POOLING])
--- a/shell.nix
+++ b/shell.nix
@ -12,6 +12,6 @@ mkShell {
  shellHook = ''
      export LD_LIBRARY_PATH="${pkgs.stdenv.cc.cc.lib}/lib";
      export LD_LIBRARY_PATH="${pkgs.zlib}/lib:$LD_LIBRARY_PATH";
-      alias run="pipenv run python main.py; notify-send -u normal -a 'Hin' 'finished'"
+      alias hin="pipenv run python main.py; notify-send -u normal -a 'Hin' 'finished'"
  '';
 }
--- a/src/evaluate.py
+++ b/src/evaluate.py
@ -3,6 +3,10 @@ import torch
 import torch.nn.functional as F
 import progressbar
 import math
 import warnings
 # Suppress FutureWarnings and other warnings
 warnings.simplefilter(action='ignore', category=FutureWarning)
 CLS_POOLING = 1
 MEAN_POOLING = 2
@ -66,7 +70,7 @@ def score_results(subject, results, weights, pooling):
    subject_model_output = get_subject_output(subject)
    print("* Tokenized subject\n")
-    scored_results_urls = []
+    scored_results_titles = []
    scored_results = []
    print("* Started scoring results\n")
@ -90,17 +94,17 @@ def score_results(subject, results, weights, pooling):
        progress += 1
        bar.update(progress)
        if not ("content" in result) :
            continue
        title = result['title']
        url = result['url']
        snippet = result['content']
-        if title == subject :
+        if title in scored_results_titles :
            found_original = True
        if url in scored_results_urls :
            continue
-        scored_results_urls.append(url)
+        scored_results_titles.append(title)
        # Compute similarity between subject and result
--- a/src/key2.py
+++ b/src/key2.py
@ -0,0 +1,38 @@
 from sklearn.feature_extraction.text import TfidfVectorizer
 from itertools import combinations
 vectorizer = TfidfVectorizer(stop_words='english')
 def create_queries(subject) :
    print("\n### Getting Keywords ###\n")
    tfidf_matrix = vectorizer.fit_transform([subject])
    feature_names = vectorizer.get_feature_names_out()
    print("* Preparation done")
    sorted_indices = tfidf_matrix[0].toarray()[0].argsort()[::-1]
    text_keywords = []
    for i in range(5):  # Change 3 to however many keywords you want
        if i < len(sorted_indices):
            text_keywords.append(feature_names[sorted_indices[i]])
    log = f"## Keywords\n\n{text_keywords}\n\n"
    queries = []
    for r in range(1, len(text_keywords) + 1):
        comb = combinations(text_keywords, r)
        queries.extend(comb)
    final_queries = [subject] + ["\"" + "\" OR \"".join(query) + "\"" for query in queries]
    #final_queries.ins(subject)
    print("* query generated")
    return final_queries, log
--- a/src/key3.py
+++ b/src/key3.py
@ -0,0 +1,24 @@
 from groq import Groq
 from src.api import KEY
 client = Groq(
    api_key=KEY,
 )
 def create_queries(subject) :
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Generate 15 google scholar queries from this subject : \"{subject}\" Your response should only contain the queries, no title, no quotation marks, no numbers, one per line.",
            }
        ],
        model="llama3-8b-8192",
    )
    log = ""
    queries = chat_completion.choices[0].message.content.split("\n")
    return queries, log
--- a/src/test.py
+++ b/src/test.py
@ -1,25 +1,24 @@
-import requests
+from groq import Groq
-subject = "Experiments, numerical models and optimization of carbon-epoxy plates damped by a frequency-dependent interleaved viscoelastic layer"
+client = Groq(
    api_key=KEY,
 )
-searxng_url = "https://search.penwing.org/search"
+def create_queries(subject) :
-params = {
+    chat_completion = client.chat.completions.create(
-    "q": subject,  # Your search query
+        messages=[
-    "format": "json",         # Requesting JSON format
+            {
-    "categories": "science",  # You can specify categories (optional)
+                "role": "user",
-}
+                "content": f"Generate 15 google scholar queries from this subject : \"{subject}\" Your response should only contain the queries, no title, no quotation marks, no numbers, one per line.",
            }
        ],
        model="llama3-8b-8192",
    )
-response = requests.get(searxng_url, params=params)
+    log = ""
-if response.status_code == 200:
+    queries = chat_completion.choices[0].message.content.split("\n")
-    data = response.json()
+    print(queries)
-    # List to store results with similarity scores
+    return queries, log
    scored_results = []
    for result in data.get("results", []):
        print(result['title'])
        print("---")
 else:
    print(f"Error: {response.status_code}")